{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "import re\n",
    "import time\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline\n",
    "import pandas as pd\n",
    "pd.options.display.max_columns = None\n",
    "pd.options.display.mpl_style = 'default'\n",
    "from nltk.tokenize import word_tokenize\n",
    "from util3 import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_train = pd.read_csv('./data/train.csv', encoding='ISO-8859-1')\n",
    "df_test = pd.read_csv('./data/test.csv', encoding='ISO-8859-1')\n",
    "df_desp = pd.read_csv('./data/product_descriptions.csv', encoding='ISO-8859-1')\n",
    "df_attr = pd.read_csv('./data/attributes.csv', encoding='ISO-8859-1')\n",
    "num_train = df_train.shape[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Attributes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Bullet02                       86248\n",
       "Bullet03                       86226\n",
       "MFG Brand Name                 86220\n",
       "Bullet04                       86174\n",
       "Bullet01                       85940\n",
       "Product Width (in.)            61137\n",
       "Bullet05                       60528\n",
       "Product Height (in.)           54698\n",
       "Product Depth (in.)            53652\n",
       "Product Weight (lb.)           45175\n",
       "Bullet06                       44901\n",
       "Color Family                   41508\n",
       "Bullet07                       34349\n",
       "Material                       31499\n",
       "Color/Finish                   28540\n",
       "Bullet08                       26645\n",
       "Certifications and Listings    24583\n",
       "Bullet09                       20567\n",
       "Assembled Height (in.)         18299\n",
       "Assembled Width (in.)          18263\n",
       "Assembled Depth (in.)          18198\n",
       "Product Length (in.)           16705\n",
       "Bullet10                       14763\n",
       "Indoor/Outdoor                 12939\n",
       "Bullet11                       11784\n",
       "Commercial / Residential        9530\n",
       "Bullet12                        8795\n",
       "ENERGY STAR Certified           8420\n",
       "Hardware Included               7462\n",
       "Package Quantity                6904\n",
       "Name: name, dtype: int64"
      ]
     },
     "execution_count": 86,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_attr['name'].value_counts()[:30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_attr.dropna(inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Among the top 30 attributes, some seem to be not very useful. They are:\n",
    "- Certifications and Listings\n",
    "- Package Quantity\n",
    "- Hardware Included  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def filter_str(df, s, col='search_term'):\n",
    "    return df[df[col].str.lower().str.contains(s)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>product_title</th>\n",
       "      <th>product_uid</th>\n",
       "      <th>relevance</th>\n",
       "      <th>search_term</th>\n",
       "      <th>product_description</th>\n",
       "      <th>brand</th>\n",
       "      <th>bullet</th>\n",
       "      <th>bullet_count</th>\n",
       "      <th>color</th>\n",
       "      <th>material</th>\n",
       "      <th>flag_commercial</th>\n",
       "      <th>flag_residential</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>375</th>\n",
       "      <td>1208</td>\n",
       "      <td>Liberty 3-3/4 in. Steel Bar Pull (25-Pack)</td>\n",
       "      <td>100209</td>\n",
       "      <td>2.67</td>\n",
       "      <td>bathroom hardware knobs and pulls</td>\n",
       "      <td>Sleek and sophisticated, this design makes a c...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1416</th>\n",
       "      <td>4355</td>\n",
       "      <td>Martha Stewart Living 3-3/4 in. Bar Cabinet Ha...</td>\n",
       "      <td>100748</td>\n",
       "      <td>2.33</td>\n",
       "      <td>3/4' hardware</td>\n",
       "      <td>The Martha Stewart Living Country 3-3/4 in. Po...</td>\n",
       "      <td>Martha Stewart Living</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1989</th>\n",
       "      <td>6161</td>\n",
       "      <td>Defiant Hartford Satin Nickel Entry Knob</td>\n",
       "      <td>101061</td>\n",
       "      <td>2.67</td>\n",
       "      <td>door lock hardware</td>\n",
       "      <td>Featuring a lifetime guarantee, Defiant meets ...</td>\n",
       "      <td>Defiant</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Stainless steel Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2180</th>\n",
       "      <td>6735</td>\n",
       "      <td>Martha Stewart Living 3-3/4 in. Dowel Cabinet ...</td>\n",
       "      <td>101158</td>\n",
       "      <td>3.00</td>\n",
       "      <td>3/4' hardware</td>\n",
       "      <td>The Martha Stewart Living Country 3-3/4 in. Be...</td>\n",
       "      <td>Martha Stewart Living</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2295</th>\n",
       "      <td>7047</td>\n",
       "      <td>Oz-Post T4-850 4 in. Square Wood Post Anchor (...</td>\n",
       "      <td>101200</td>\n",
       "      <td>1.33</td>\n",
       "      <td>oz metal fence hardware</td>\n",
       "      <td>Oz-Post is one of the best ways to secure a wo...</td>\n",
       "      <td>Oz-Post</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2337</th>\n",
       "      <td>7169</td>\n",
       "      <td>Liberty 2-3/4 in. or 3 in. Newton Cabinet Hard...</td>\n",
       "      <td>101223</td>\n",
       "      <td>2.00</td>\n",
       "      <td>3/4' hardware</td>\n",
       "      <td>The Liberty 2-3/4 or 3 in. Satin Nickel Dual-M...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2339</th>\n",
       "      <td>7172</td>\n",
       "      <td>Liberty 2-3/4 in. or 3 in. Newton Cabinet Hard...</td>\n",
       "      <td>101223</td>\n",
       "      <td>3.00</td>\n",
       "      <td>kitchen cabinet drawer center-mount hardware</td>\n",
       "      <td>The Liberty 2-3/4 or 3 in. Satin Nickel Dual-M...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2706</th>\n",
       "      <td>8370</td>\n",
       "      <td>HDX 4 ft. x 100 ft. 14-Gauge Welded Wire</td>\n",
       "      <td>101419</td>\n",
       "      <td>2.00</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>The HDX 4 ft. x 100 ft. Welded Wire is made of...</td>\n",
       "      <td>HDX</td>\n",
       "      <td>Silver</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Silver</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2965</th>\n",
       "      <td>9189</td>\n",
       "      <td>Liberty 2-1/2 in. or 3 in. Dark Oil Rubbed Bro...</td>\n",
       "      <td>101566</td>\n",
       "      <td>2.00</td>\n",
       "      <td>kitchen hardware</td>\n",
       "      <td>Use the Liberty Hardware 2-1/2 or 3 in. Dual M...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2975</th>\n",
       "      <td>9221</td>\n",
       "      <td>Oz-Post Steel 2 Wood Fence Bracket Project Pac...</td>\n",
       "      <td>101571</td>\n",
       "      <td>2.33</td>\n",
       "      <td>wap around hardware</td>\n",
       "      <td>The WAP-238 from OZCO is a galvanized bracket ...</td>\n",
       "      <td>Oz-Post</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2982</th>\n",
       "      <td>9244</td>\n",
       "      <td>HDX 1/4 in. x 2 ft. x 5 ft. Hardware Cloth</td>\n",
       "      <td>101575</td>\n",
       "      <td>2.00</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>A lightweight, flexible and economical wire me...</td>\n",
       "      <td>HDX</td>\n",
       "      <td>Silver</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Silver</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3300</th>\n",
       "      <td>10232</td>\n",
       "      <td>Schlage Plymouth Double Cylinder Antique Brass...</td>\n",
       "      <td>101755</td>\n",
       "      <td>3.00</td>\n",
       "      <td>interior door hardware by schlage</td>\n",
       "      <td>The Plymouth front entry features a simple cur...</td>\n",
       "      <td>Schlage</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Solid Brass</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3421</th>\n",
       "      <td>10576</td>\n",
       "      <td>Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...</td>\n",
       "      <td>101831</td>\n",
       "      <td>2.00</td>\n",
       "      <td>kitchen cabinet drawer center-mount hardware</td>\n",
       "      <td>Sleek and sophisticated, this design makes a c...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3422</th>\n",
       "      <td>10577</td>\n",
       "      <td>Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...</td>\n",
       "      <td>101831</td>\n",
       "      <td>2.00</td>\n",
       "      <td>kitchen cabinte hardware blue knob</td>\n",
       "      <td>Sleek and sophisticated, this design makes a c...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3423</th>\n",
       "      <td>10578</td>\n",
       "      <td>Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...</td>\n",
       "      <td>101831</td>\n",
       "      <td>3.00</td>\n",
       "      <td>liberty campaign hardware</td>\n",
       "      <td>Sleek and sophisticated, this design makes a c...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3775</th>\n",
       "      <td>11728</td>\n",
       "      <td>Veranda 1-1/2 oz. Vinyl Fence Cement</td>\n",
       "      <td>102046</td>\n",
       "      <td>2.67</td>\n",
       "      <td>vinyl fence hardware</td>\n",
       "      <td>Veranda PVC cement glue is specifically design...</td>\n",
       "      <td>Veranda</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3792</th>\n",
       "      <td>11788</td>\n",
       "      <td>HDX 28 in. x 50 ft. Garden Fence</td>\n",
       "      <td>102057</td>\n",
       "      <td>2.00</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>If rabbits and other varmints are getting to y...</td>\n",
       "      <td>HDX</td>\n",
       "      <td>Silver</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Silver</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3933</th>\n",
       "      <td>12218</td>\n",
       "      <td>YARDGARD 5 ft. x 50 ft. 14-Gauge Vinyl Galvani...</td>\n",
       "      <td>102139</td>\n",
       "      <td>1.67</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>Welded wire is a general purpose fence providi...</td>\n",
       "      <td>YARDGARD</td>\n",
       "      <td>Green</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Green</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4159</th>\n",
       "      <td>13026</td>\n",
       "      <td>Tenax 3 ft. x 15 ft. Plastic Black Hardware Net</td>\n",
       "      <td>102276</td>\n",
       "      <td>2.33</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>Replaces metal hardware net under must applica...</td>\n",
       "      <td>Tenax</td>\n",
       "      <td>Black Black</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Black Black</td>\n",
       "      <td>Plastic</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4613</th>\n",
       "      <td>14452</td>\n",
       "      <td>Liberty 3-3/4 in. Steel Bar Cabinet Hardware Pull</td>\n",
       "      <td>102520</td>\n",
       "      <td>1.33</td>\n",
       "      <td>liberty campaign hardware</td>\n",
       "      <td>Sleek and sophisticated, this design makes a c...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4844</th>\n",
       "      <td>15184</td>\n",
       "      <td>Oz-Post Steel 2 Wood Fence Bracket WAP-OZ</td>\n",
       "      <td>102651</td>\n",
       "      <td>2.67</td>\n",
       "      <td>oz metal fence hardware</td>\n",
       "      <td>The remarkable WAP-OZ fence bracket from OZCO ...</td>\n",
       "      <td>Oz-Post</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5260</th>\n",
       "      <td>16453</td>\n",
       "      <td>Martha Stewart Living 3 in. Bedford Nickel Cyl...</td>\n",
       "      <td>102905</td>\n",
       "      <td>2.00</td>\n",
       "      <td>kitchen hardware</td>\n",
       "      <td>The Martha Stewart Living Modern 3 in. Bedford...</td>\n",
       "      <td>Martha Stewart Living</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6770</th>\n",
       "      <td>21202</td>\n",
       "      <td>Hickory Hardware Oil-Rubbed Bronze Surface Sel...</td>\n",
       "      <td>103960</td>\n",
       "      <td>2.00</td>\n",
       "      <td>hickory hardware 469999035</td>\n",
       "      <td>Update your cabinetry with the Hickory Hardwar...</td>\n",
       "      <td>Hickory Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6835</th>\n",
       "      <td>21419</td>\n",
       "      <td>Defiant Springfield Satin Nickel Mushroom Hand...</td>\n",
       "      <td>104017</td>\n",
       "      <td>3.00</td>\n",
       "      <td>front door hardware</td>\n",
       "      <td>Reinforce your entry door with the Defiant Spr...</td>\n",
       "      <td>Defiant</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Solid Brass</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6909</th>\n",
       "      <td>21626</td>\n",
       "      <td>HDX 1 in. x 4 ft. x 50 ft. Poultry Netting</td>\n",
       "      <td>104079</td>\n",
       "      <td>2.00</td>\n",
       "      <td>wire fences hardware</td>\n",
       "      <td>The HDX 1 in. x 4 ft. x 50 ft. 20-Gauge Galvan...</td>\n",
       "      <td>HDX</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Galvanized Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6923</th>\n",
       "      <td>21680</td>\n",
       "      <td>Everbilt Anti-Sag Gate Kit</td>\n",
       "      <td>104092</td>\n",
       "      <td>2.00</td>\n",
       "      <td>fence gate hardware eyebolt</td>\n",
       "      <td>The Everbilt Anti-Sag Gate Kit is ideal to eli...</td>\n",
       "      <td>Everbilt</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7209</th>\n",
       "      <td>22570</td>\n",
       "      <td>ClosetMaid Preloaded Wall Brackets for SuperSl...</td>\n",
       "      <td>104340</td>\n",
       "      <td>2.00</td>\n",
       "      <td>shelves wood hardware</td>\n",
       "      <td>This set of 2 ClosetMaid Wall Brackets is desi...</td>\n",
       "      <td>ClosetMaid</td>\n",
       "      <td>White White</td>\n",
       "      <td>2.0</td>\n",
       "      <td>White White</td>\n",
       "      <td>Resin</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7363</th>\n",
       "      <td>23045</td>\n",
       "      <td>Veranda Aluminum Rail Bracket for Vinyl Fencin...</td>\n",
       "      <td>104469</td>\n",
       "      <td>3.00</td>\n",
       "      <td>vinyl fence hardware</td>\n",
       "      <td>Choose Veranda's 2 in. x 3 in. Fence Rail Brac...</td>\n",
       "      <td>Veranda</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7461</th>\n",
       "      <td>23327</td>\n",
       "      <td>Veranda Shadowbox White Vinyl Fence Bracket Kit</td>\n",
       "      <td>104547</td>\n",
       "      <td>3.00</td>\n",
       "      <td>vinyl fence hardware</td>\n",
       "      <td>Veranda vinyl fencing is The Home Depot's prem...</td>\n",
       "      <td>Veranda</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7475</th>\n",
       "      <td>23381</td>\n",
       "      <td>Liberty Satin Nickel 1-3/8 in. Large Football ...</td>\n",
       "      <td>104569</td>\n",
       "      <td>3.00</td>\n",
       "      <td>bathroom hardware knobs and pulls</td>\n",
       "      <td>Use the Liberty 1-3/8 in. Satin Nickel Large F...</td>\n",
       "      <td>nobrand</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220714</th>\n",
       "      <td>220324</td>\n",
       "      <td>PartsmasterPro Universal Designer Lever Handle...</td>\n",
       "      <td>205663</td>\n",
       "      <td>NaN</td>\n",
       "      <td>porcelain handle hardware</td>\n",
       "      <td>Replacing your faucets handles makes your fauc...</td>\n",
       "      <td>PartsmasterPro</td>\n",
       "      <td>Chrome Chrome</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Chrome Chrome</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222464</th>\n",
       "      <td>222465</td>\n",
       "      <td>Stanley-National Hardware 4-1/2 in. Template H...</td>\n",
       "      <td>207497</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware template</td>\n",
       "      <td>Whether for home, farm, builder or industrial ...</td>\n",
       "      <td>Stanley-National Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223589</th>\n",
       "      <td>223590</td>\n",
       "      <td>Prime-Line 3/8 in. Bi-Fold Door Pivot Set</td>\n",
       "      <td>208443</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bi fold door hardware</td>\n",
       "      <td>This Prime-Line Products 3/8 in. Bifold Door P...</td>\n",
       "      <td>Prime-Line</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Plastic</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223642</th>\n",
       "      <td>223643</td>\n",
       "      <td>Barton Kramer Bi-Fold Jamb Bracket Closet Door...</td>\n",
       "      <td>208491</td>\n",
       "      <td>NaN</td>\n",
       "      <td>closet hardware</td>\n",
       "      <td>This bi-fold closet door jamb bracket is desig...</td>\n",
       "      <td>Barton Kramer</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223643</th>\n",
       "      <td>223644</td>\n",
       "      <td>Barton Kramer Bi-Fold Jamb Bracket Closet Door...</td>\n",
       "      <td>208491</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware brackets</td>\n",
       "      <td>This bi-fold closet door jamb bracket is desig...</td>\n",
       "      <td>Barton Kramer</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223774</th>\n",
       "      <td>223775</td>\n",
       "      <td>Stainless Glide Stainless Steel Top Mount Roll...</td>\n",
       "      <td>208596</td>\n",
       "      <td>NaN</td>\n",
       "      <td>stainless steel hardware</td>\n",
       "      <td>The Stainless Glide Stainless Steel Rolling Do...</td>\n",
       "      <td>Stainless Glide</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224771</th>\n",
       "      <td>224772</td>\n",
       "      <td>Canine Hardware 30 in. x 39 in. Pet Travel Bed</td>\n",
       "      <td>209453</td>\n",
       "      <td>NaN</td>\n",
       "      <td>murphy bed hardware</td>\n",
       "      <td>This travel bed's plush look and feel will als...</td>\n",
       "      <td>Canine Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Other</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226311</th>\n",
       "      <td>226312</td>\n",
       "      <td>Martha Stewart Living Bedford 3 in. Nickel Can...</td>\n",
       "      <td>210797</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware</td>\n",
       "      <td>The Martha Stewart Living 3 in. Bedford Nickel...</td>\n",
       "      <td>Martha Stewart Living</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226361</th>\n",
       "      <td>226362</td>\n",
       "      <td>Prime-Line Bi-Fold Door Top Guide Wheel, 5/8 i...</td>\n",
       "      <td>210841</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bi fold door hardware</td>\n",
       "      <td>This bi-fold door pivot is constructed from st...</td>\n",
       "      <td>Prime-Line</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226826</th>\n",
       "      <td>226827</td>\n",
       "      <td>Stainless Glide Stainless Steel Dual Wheel Str...</td>\n",
       "      <td>211265</td>\n",
       "      <td>NaN</td>\n",
       "      <td>stainless steel hardware</td>\n",
       "      <td>The Stainless Glide Stainless Steel Rolling Do...</td>\n",
       "      <td>Stainless Glide</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>Stainless Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226881</th>\n",
       "      <td>226882</td>\n",
       "      <td>Prime-Line Solid Brass Pocket Door Combination...</td>\n",
       "      <td>211314</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pocket door hardware</td>\n",
       "      <td>This mortise latch unit features recessed grip...</td>\n",
       "      <td>Prime-Line</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Solid Brass</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227150</th>\n",
       "      <td>227151</td>\n",
       "      <td>Young House Love 3 in. Vintage Style Cocoa Bro...</td>\n",
       "      <td>211554</td>\n",
       "      <td>NaN</td>\n",
       "      <td>liberty campaign hardware</td>\n",
       "      <td>From Liberty Hardware and Young House Love, th...</td>\n",
       "      <td>Young House Love</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228862</th>\n",
       "      <td>228863</td>\n",
       "      <td>Continental Home Hardware 1-3/4 in. Satin Nick...</td>\n",
       "      <td>213100</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3/4' hardware</td>\n",
       "      <td>Thomasville  Hardware brings a customized hard...</td>\n",
       "      <td>Continental Home Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229742</th>\n",
       "      <td>229743</td>\n",
       "      <td>Stanley-National Hardware 6 in. Professional C...</td>\n",
       "      <td>213907</td>\n",
       "      <td>NaN</td>\n",
       "      <td>gate hardware kit</td>\n",
       "      <td>Stanley-National Hardware has been a leading m...</td>\n",
       "      <td>Stanley-National Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>229999</th>\n",
       "      <td>230000</td>\n",
       "      <td>Prime-Line 3/4 in. Flat Nylon Wheel Bi-Fold Do...</td>\n",
       "      <td>214140</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pocket door hardware</td>\n",
       "      <td>This wardrobe door roller is constructed from ...</td>\n",
       "      <td>Prime-Line</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Other</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230047</th>\n",
       "      <td>230048</td>\n",
       "      <td>Kwikset Arlington Single Cylinder Antique Bras...</td>\n",
       "      <td>214184</td>\n",
       "      <td>NaN</td>\n",
       "      <td>front door hardware</td>\n",
       "      <td>Step up to designer styles and superior securi...</td>\n",
       "      <td>Kwikset</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230825</th>\n",
       "      <td>230826</td>\n",
       "      <td>PlayStar All Star Build It Yourself Gold Plays...</td>\n",
       "      <td>214889</td>\n",
       "      <td>NaN</td>\n",
       "      <td>lumber hardware</td>\n",
       "      <td>The All Star XP Gold Design has 12 sq. ft. of ...</td>\n",
       "      <td>PlayStar</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Galvanized Steel Metal Plastic/Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231360</th>\n",
       "      <td>231361</td>\n",
       "      <td>Barton Kramer Johnson Hardware Bi-Fold Door Bo...</td>\n",
       "      <td>215386</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware brackets</td>\n",
       "      <td>This bi-fold door bottom pivot and bracket is ...</td>\n",
       "      <td>Barton Kramer</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>232910</th>\n",
       "      <td>232911</td>\n",
       "      <td>Prime-Line Closet Pole Sockets, 1-3/8 in., Pla...</td>\n",
       "      <td>216836</td>\n",
       "      <td>NaN</td>\n",
       "      <td>closet hardware</td>\n",
       "      <td>These pole sockets are constructed from sturdy...</td>\n",
       "      <td>Prime-Line</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Plastic</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>233124</th>\n",
       "      <td>233125</td>\n",
       "      <td>Richelieu Hardware 4-15/16 in. Furniture Leg</td>\n",
       "      <td>217037</td>\n",
       "      <td>NaN</td>\n",
       "      <td>murphy bed hardware</td>\n",
       "      <td>An ideal way to update your furniture, the Ric...</td>\n",
       "      <td>Richelieu Hardware</td>\n",
       "      <td>Matte Black</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Matte Black</td>\n",
       "      <td>Other</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>233302</th>\n",
       "      <td>233303</td>\n",
       "      <td>Hickory Hardware Studio Collection 1 in. Oil-R...</td>\n",
       "      <td>217204</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hickory hardware studio</td>\n",
       "      <td>This Hickory hardware Studio Collection 1 in. ...</td>\n",
       "      <td>Hickory Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>234873</th>\n",
       "      <td>234874</td>\n",
       "      <td>Liberty 3-3/4 in. Plaza Cabinet Hardware Pull</td>\n",
       "      <td>218685</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3/4' hardware</td>\n",
       "      <td>The Liberty 3-3/4 in. Brushed Satin-Nickel Pla...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235233</th>\n",
       "      <td>235234</td>\n",
       "      <td>National Hardware Vinyl Fence Gate Kit in Whit...</td>\n",
       "      <td>219029</td>\n",
       "      <td>NaN</td>\n",
       "      <td>vinyl fence hardware</td>\n",
       "      <td>Whether for home, farm, builder or industrial ...</td>\n",
       "      <td>National Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Steel</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235587</th>\n",
       "      <td>235588</td>\n",
       "      <td>Richelieu Hardware 8 in. x 10 in. White Enamel...</td>\n",
       "      <td>219367</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware brackets</td>\n",
       "      <td>Onward products offer unique and creative hard...</td>\n",
       "      <td>Richelieu Hardware</td>\n",
       "      <td>White White</td>\n",
       "      <td>2.0</td>\n",
       "      <td>White White</td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235701</th>\n",
       "      <td>235702</td>\n",
       "      <td>Liberty Cabinet Drawer Hardware Installation T...</td>\n",
       "      <td>219477</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware template</td>\n",
       "      <td>This Liberty Cabinet Hardware Installation Tem...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Plastic</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>236106</th>\n",
       "      <td>236107</td>\n",
       "      <td>Rustica Hardware 42 in. x 84 in. Modern Range ...</td>\n",
       "      <td>219865</td>\n",
       "      <td>NaN</td>\n",
       "      <td>sliding cabinet door hardware</td>\n",
       "      <td>As unique as your fingerprints are to you, so ...</td>\n",
       "      <td>nobrand</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237028</th>\n",
       "      <td>237029</td>\n",
       "      <td>Everbilt Heavy Duty 36 in. Pocket Door Frame Set</td>\n",
       "      <td>220759</td>\n",
       "      <td>NaN</td>\n",
       "      <td>pocket door hardware</td>\n",
       "      <td>The Everbilt 36 in. Heavy Duty Pocket Door Fra...</td>\n",
       "      <td>Everbilt</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Aluminum</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237151</th>\n",
       "      <td>237152</td>\n",
       "      <td>Hickory Hardware Studio 1-1/4 in. Oil Rubbed B...</td>\n",
       "      <td>220878</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hickory hardware studio</td>\n",
       "      <td>Bold style and functionality are combined in t...</td>\n",
       "      <td>Hickory Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Metal</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237739</th>\n",
       "      <td>237740</td>\n",
       "      <td>Johnson Hardware 111FD Series 72 in. Track and...</td>\n",
       "      <td>221455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>bi fold door hardware</td>\n",
       "      <td>The Johnson Hardware 111FD Series 72 in. Track...</td>\n",
       "      <td>Johnson Hardware</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Aluminum</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239861</th>\n",
       "      <td>239862</td>\n",
       "      <td>Liberty 1-1/4 in. Hollow Cabinet Hardware Knob</td>\n",
       "      <td>223535</td>\n",
       "      <td>NaN</td>\n",
       "      <td>hardware knob</td>\n",
       "      <td>The clean lines of this knob fit several desig...</td>\n",
       "      <td>Liberty</td>\n",
       "      <td></td>\n",
       "      <td>0.0</td>\n",
       "      <td></td>\n",
       "      <td>Ceramic</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>742 rows × 13 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            id                                      product_title  \\\n",
       "375       1208         Liberty 3-3/4 in. Steel Bar Pull (25-Pack)   \n",
       "1416      4355  Martha Stewart Living 3-3/4 in. Bar Cabinet Ha...   \n",
       "1989      6161           Defiant Hartford Satin Nickel Entry Knob   \n",
       "2180      6735  Martha Stewart Living 3-3/4 in. Dowel Cabinet ...   \n",
       "2295      7047  Oz-Post T4-850 4 in. Square Wood Post Anchor (...   \n",
       "2337      7169  Liberty 2-3/4 in. or 3 in. Newton Cabinet Hard...   \n",
       "2339      7172  Liberty 2-3/4 in. or 3 in. Newton Cabinet Hard...   \n",
       "2706      8370           HDX 4 ft. x 100 ft. 14-Gauge Welded Wire   \n",
       "2965      9189  Liberty 2-1/2 in. or 3 in. Dark Oil Rubbed Bro...   \n",
       "2975      9221  Oz-Post Steel 2 Wood Fence Bracket Project Pac...   \n",
       "2982      9244         HDX 1/4 in. x 2 ft. x 5 ft. Hardware Cloth   \n",
       "3300     10232  Schlage Plymouth Double Cylinder Antique Brass...   \n",
       "3421     10576  Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...   \n",
       "3422     10577  Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...   \n",
       "3423     10578  Liberty 6-2/7 in. Steel Bar Cabinet Hardware A...   \n",
       "3775     11728               Veranda 1-1/2 oz. Vinyl Fence Cement   \n",
       "3792     11788                   HDX 28 in. x 50 ft. Garden Fence   \n",
       "3933     12218  YARDGARD 5 ft. x 50 ft. 14-Gauge Vinyl Galvani...   \n",
       "4159     13026    Tenax 3 ft. x 15 ft. Plastic Black Hardware Net   \n",
       "4613     14452  Liberty 3-3/4 in. Steel Bar Cabinet Hardware Pull   \n",
       "4844     15184          Oz-Post Steel 2 Wood Fence Bracket WAP-OZ   \n",
       "5260     16453  Martha Stewart Living 3 in. Bedford Nickel Cyl...   \n",
       "6770     21202  Hickory Hardware Oil-Rubbed Bronze Surface Sel...   \n",
       "6835     21419  Defiant Springfield Satin Nickel Mushroom Hand...   \n",
       "6909     21626         HDX 1 in. x 4 ft. x 50 ft. Poultry Netting   \n",
       "6923     21680                         Everbilt Anti-Sag Gate Kit   \n",
       "7209     22570  ClosetMaid Preloaded Wall Brackets for SuperSl...   \n",
       "7363     23045  Veranda Aluminum Rail Bracket for Vinyl Fencin...   \n",
       "7461     23327    Veranda Shadowbox White Vinyl Fence Bracket Kit   \n",
       "7475     23381  Liberty Satin Nickel 1-3/8 in. Large Football ...   \n",
       "...        ...                                                ...   \n",
       "220714  220324  PartsmasterPro Universal Designer Lever Handle...   \n",
       "222464  222465  Stanley-National Hardware 4-1/2 in. Template H...   \n",
       "223589  223590          Prime-Line 3/8 in. Bi-Fold Door Pivot Set   \n",
       "223642  223643  Barton Kramer Bi-Fold Jamb Bracket Closet Door...   \n",
       "223643  223644  Barton Kramer Bi-Fold Jamb Bracket Closet Door...   \n",
       "223774  223775  Stainless Glide Stainless Steel Top Mount Roll...   \n",
       "224771  224772     Canine Hardware 30 in. x 39 in. Pet Travel Bed   \n",
       "226311  226312  Martha Stewart Living Bedford 3 in. Nickel Can...   \n",
       "226361  226362  Prime-Line Bi-Fold Door Top Guide Wheel, 5/8 i...   \n",
       "226826  226827  Stainless Glide Stainless Steel Dual Wheel Str...   \n",
       "226881  226882  Prime-Line Solid Brass Pocket Door Combination...   \n",
       "227150  227151  Young House Love 3 in. Vintage Style Cocoa Bro...   \n",
       "228862  228863  Continental Home Hardware 1-3/4 in. Satin Nick...   \n",
       "229742  229743  Stanley-National Hardware 6 in. Professional C...   \n",
       "229999  230000  Prime-Line 3/4 in. Flat Nylon Wheel Bi-Fold Do...   \n",
       "230047  230048  Kwikset Arlington Single Cylinder Antique Bras...   \n",
       "230825  230826  PlayStar All Star Build It Yourself Gold Plays...   \n",
       "231360  231361  Barton Kramer Johnson Hardware Bi-Fold Door Bo...   \n",
       "232910  232911  Prime-Line Closet Pole Sockets, 1-3/8 in., Pla...   \n",
       "233124  233125       Richelieu Hardware 4-15/16 in. Furniture Leg   \n",
       "233302  233303  Hickory Hardware Studio Collection 1 in. Oil-R...   \n",
       "234873  234874      Liberty 3-3/4 in. Plaza Cabinet Hardware Pull   \n",
       "235233  235234  National Hardware Vinyl Fence Gate Kit in Whit...   \n",
       "235587  235588  Richelieu Hardware 8 in. x 10 in. White Enamel...   \n",
       "235701  235702  Liberty Cabinet Drawer Hardware Installation T...   \n",
       "236106  236107  Rustica Hardware 42 in. x 84 in. Modern Range ...   \n",
       "237028  237029   Everbilt Heavy Duty 36 in. Pocket Door Frame Set   \n",
       "237151  237152  Hickory Hardware Studio 1-1/4 in. Oil Rubbed B...   \n",
       "237739  237740  Johnson Hardware 111FD Series 72 in. Track and...   \n",
       "239861  239862     Liberty 1-1/4 in. Hollow Cabinet Hardware Knob   \n",
       "\n",
       "        product_uid  relevance                                   search_term  \\\n",
       "375          100209       2.67             bathroom hardware knobs and pulls   \n",
       "1416         100748       2.33                                 3/4' hardware   \n",
       "1989         101061       2.67                            door lock hardware   \n",
       "2180         101158       3.00                                 3/4' hardware   \n",
       "2295         101200       1.33                       oz metal fence hardware   \n",
       "2337         101223       2.00                                 3/4' hardware   \n",
       "2339         101223       3.00  kitchen cabinet drawer center-mount hardware   \n",
       "2706         101419       2.00                          wire fences hardware   \n",
       "2965         101566       2.00                              kitchen hardware   \n",
       "2975         101571       2.33                           wap around hardware   \n",
       "2982         101575       2.00                          wire fences hardware   \n",
       "3300         101755       3.00             interior door hardware by schlage   \n",
       "3421         101831       2.00  kitchen cabinet drawer center-mount hardware   \n",
       "3422         101831       2.00            kitchen cabinte hardware blue knob   \n",
       "3423         101831       3.00                     liberty campaign hardware   \n",
       "3775         102046       2.67                          vinyl fence hardware   \n",
       "3792         102057       2.00                          wire fences hardware   \n",
       "3933         102139       1.67                          wire fences hardware   \n",
       "4159         102276       2.33                          wire fences hardware   \n",
       "4613         102520       1.33                     liberty campaign hardware   \n",
       "4844         102651       2.67                       oz metal fence hardware   \n",
       "5260         102905       2.00                              kitchen hardware   \n",
       "6770         103960       2.00                    hickory hardware 469999035   \n",
       "6835         104017       3.00                           front door hardware   \n",
       "6909         104079       2.00                          wire fences hardware   \n",
       "6923         104092       2.00                   fence gate hardware eyebolt   \n",
       "7209         104340       2.00                         shelves wood hardware   \n",
       "7363         104469       3.00                          vinyl fence hardware   \n",
       "7461         104547       3.00                          vinyl fence hardware   \n",
       "7475         104569       3.00             bathroom hardware knobs and pulls   \n",
       "...             ...        ...                                           ...   \n",
       "220714       205663        NaN                     porcelain handle hardware   \n",
       "222464       207497        NaN                             hardware template   \n",
       "223589       208443        NaN                         bi fold door hardware   \n",
       "223642       208491        NaN                               closet hardware   \n",
       "223643       208491        NaN                             hardware brackets   \n",
       "223774       208596        NaN                      stainless steel hardware   \n",
       "224771       209453        NaN                           murphy bed hardware   \n",
       "226311       210797        NaN                                      hardware   \n",
       "226361       210841        NaN                         bi fold door hardware   \n",
       "226826       211265        NaN                      stainless steel hardware   \n",
       "226881       211314        NaN                          pocket door hardware   \n",
       "227150       211554        NaN                     liberty campaign hardware   \n",
       "228862       213100        NaN                                 3/4' hardware   \n",
       "229742       213907        NaN                             gate hardware kit   \n",
       "229999       214140        NaN                          pocket door hardware   \n",
       "230047       214184        NaN                           front door hardware   \n",
       "230825       214889        NaN                               lumber hardware   \n",
       "231360       215386        NaN                             hardware brackets   \n",
       "232910       216836        NaN                               closet hardware   \n",
       "233124       217037        NaN                           murphy bed hardware   \n",
       "233302       217204        NaN                       hickory hardware studio   \n",
       "234873       218685        NaN                                 3/4' hardware   \n",
       "235233       219029        NaN                          vinyl fence hardware   \n",
       "235587       219367        NaN                             hardware brackets   \n",
       "235701       219477        NaN                             hardware template   \n",
       "236106       219865        NaN                 sliding cabinet door hardware   \n",
       "237028       220759        NaN                          pocket door hardware   \n",
       "237151       220878        NaN                       hickory hardware studio   \n",
       "237739       221455        NaN                         bi fold door hardware   \n",
       "239861       223535        NaN                                 hardware knob   \n",
       "\n",
       "                                      product_description  \\\n",
       "375     Sleek and sophisticated, this design makes a c...   \n",
       "1416    The Martha Stewart Living Country 3-3/4 in. Po...   \n",
       "1989    Featuring a lifetime guarantee, Defiant meets ...   \n",
       "2180    The Martha Stewart Living Country 3-3/4 in. Be...   \n",
       "2295    Oz-Post is one of the best ways to secure a wo...   \n",
       "2337    The Liberty 2-3/4 or 3 in. Satin Nickel Dual-M...   \n",
       "2339    The Liberty 2-3/4 or 3 in. Satin Nickel Dual-M...   \n",
       "2706    The HDX 4 ft. x 100 ft. Welded Wire is made of...   \n",
       "2965    Use the Liberty Hardware 2-1/2 or 3 in. Dual M...   \n",
       "2975    The WAP-238 from OZCO is a galvanized bracket ...   \n",
       "2982    A lightweight, flexible and economical wire me...   \n",
       "3300    The Plymouth front entry features a simple cur...   \n",
       "3421    Sleek and sophisticated, this design makes a c...   \n",
       "3422    Sleek and sophisticated, this design makes a c...   \n",
       "3423    Sleek and sophisticated, this design makes a c...   \n",
       "3775    Veranda PVC cement glue is specifically design...   \n",
       "3792    If rabbits and other varmints are getting to y...   \n",
       "3933    Welded wire is a general purpose fence providi...   \n",
       "4159    Replaces metal hardware net under must applica...   \n",
       "4613    Sleek and sophisticated, this design makes a c...   \n",
       "4844    The remarkable WAP-OZ fence bracket from OZCO ...   \n",
       "5260    The Martha Stewart Living Modern 3 in. Bedford...   \n",
       "6770    Update your cabinetry with the Hickory Hardwar...   \n",
       "6835    Reinforce your entry door with the Defiant Spr...   \n",
       "6909    The HDX 1 in. x 4 ft. x 50 ft. 20-Gauge Galvan...   \n",
       "6923    The Everbilt Anti-Sag Gate Kit is ideal to eli...   \n",
       "7209    This set of 2 ClosetMaid Wall Brackets is desi...   \n",
       "7363    Choose Veranda's 2 in. x 3 in. Fence Rail Brac...   \n",
       "7461    Veranda vinyl fencing is The Home Depot's prem...   \n",
       "7475    Use the Liberty 1-3/8 in. Satin Nickel Large F...   \n",
       "...                                                   ...   \n",
       "220714  Replacing your faucets handles makes your fauc...   \n",
       "222464  Whether for home, farm, builder or industrial ...   \n",
       "223589  This Prime-Line Products 3/8 in. Bifold Door P...   \n",
       "223642  This bi-fold closet door jamb bracket is desig...   \n",
       "223643  This bi-fold closet door jamb bracket is desig...   \n",
       "223774  The Stainless Glide Stainless Steel Rolling Do...   \n",
       "224771  This travel bed's plush look and feel will als...   \n",
       "226311  The Martha Stewart Living 3 in. Bedford Nickel...   \n",
       "226361  This bi-fold door pivot is constructed from st...   \n",
       "226826  The Stainless Glide Stainless Steel Rolling Do...   \n",
       "226881  This mortise latch unit features recessed grip...   \n",
       "227150  From Liberty Hardware and Young House Love, th...   \n",
       "228862  Thomasville  Hardware brings a customized hard...   \n",
       "229742  Stanley-National Hardware has been a leading m...   \n",
       "229999  This wardrobe door roller is constructed from ...   \n",
       "230047  Step up to designer styles and superior securi...   \n",
       "230825  The All Star XP Gold Design has 12 sq. ft. of ...   \n",
       "231360  This bi-fold door bottom pivot and bracket is ...   \n",
       "232910  These pole sockets are constructed from sturdy...   \n",
       "233124  An ideal way to update your furniture, the Ric...   \n",
       "233302  This Hickory hardware Studio Collection 1 in. ...   \n",
       "234873  The Liberty 3-3/4 in. Brushed Satin-Nickel Pla...   \n",
       "235233  Whether for home, farm, builder or industrial ...   \n",
       "235587  Onward products offer unique and creative hard...   \n",
       "235701  This Liberty Cabinet Hardware Installation Tem...   \n",
       "236106  As unique as your fingerprints are to you, so ...   \n",
       "237028  The Everbilt 36 in. Heavy Duty Pocket Door Fra...   \n",
       "237151  Bold style and functionality are combined in t...   \n",
       "237739  The Johnson Hardware 111FD Series 72 in. Track...   \n",
       "239861  The clean lines of this knob fit several desig...   \n",
       "\n",
       "                            brand            bullet  bullet_count  \\\n",
       "375                       Liberty                             0.0   \n",
       "1416        Martha Stewart Living                             0.0   \n",
       "1989                      Defiant                             0.0   \n",
       "2180        Martha Stewart Living                             0.0   \n",
       "2295                      Oz-Post                             0.0   \n",
       "2337                      Liberty                             0.0   \n",
       "2339                      Liberty                             0.0   \n",
       "2706                          HDX            Silver           1.0   \n",
       "2965                      Liberty                             0.0   \n",
       "2975                      Oz-Post                             0.0   \n",
       "2982                          HDX            Silver           1.0   \n",
       "3300                      Schlage                             0.0   \n",
       "3421                      Liberty                             0.0   \n",
       "3422                      Liberty                             0.0   \n",
       "3423                      Liberty                             0.0   \n",
       "3775                      Veranda                             0.0   \n",
       "3792                          HDX            Silver           1.0   \n",
       "3933                     YARDGARD             Green           1.0   \n",
       "4159                        Tenax       Black Black           2.0   \n",
       "4613                      Liberty                             0.0   \n",
       "4844                      Oz-Post                             0.0   \n",
       "5260        Martha Stewart Living                             0.0   \n",
       "6770             Hickory Hardware                             0.0   \n",
       "6835                      Defiant                             0.0   \n",
       "6909                          HDX                             0.0   \n",
       "6923                     Everbilt                             0.0   \n",
       "7209                   ClosetMaid       White White           2.0   \n",
       "7363                      Veranda                             0.0   \n",
       "7461                      Veranda                             0.0   \n",
       "7475                      nobrand                             0.0   \n",
       "...                           ...               ...           ...   \n",
       "220714             PartsmasterPro     Chrome Chrome           2.0   \n",
       "222464  Stanley-National Hardware                             0.0   \n",
       "223589                 Prime-Line                             0.0   \n",
       "223642              Barton Kramer                             0.0   \n",
       "223643              Barton Kramer                             0.0   \n",
       "223774            Stainless Glide   Stainless Steel           1.0   \n",
       "224771            Canine Hardware                             0.0   \n",
       "226311      Martha Stewart Living                             0.0   \n",
       "226361                 Prime-Line                             0.0   \n",
       "226826            Stainless Glide   Stainless Steel           1.0   \n",
       "226881                 Prime-Line                             0.0   \n",
       "227150           Young House Love                             0.0   \n",
       "228862  Continental Home Hardware                             0.0   \n",
       "229742  Stanley-National Hardware                             0.0   \n",
       "229999                 Prime-Line                             0.0   \n",
       "230047                    Kwikset                             0.0   \n",
       "230825                   PlayStar                             0.0   \n",
       "231360              Barton Kramer                             0.0   \n",
       "232910                 Prime-Line                             0.0   \n",
       "233124         Richelieu Hardware       Matte Black           1.0   \n",
       "233302           Hickory Hardware                             0.0   \n",
       "234873                    Liberty                             0.0   \n",
       "235233          National Hardware                             0.0   \n",
       "235587         Richelieu Hardware       White White           2.0   \n",
       "235701                    Liberty                             0.0   \n",
       "236106                    nobrand                             0.0   \n",
       "237028                   Everbilt                             0.0   \n",
       "237151           Hickory Hardware                             0.0   \n",
       "237739           Johnson Hardware                             0.0   \n",
       "239861                    Liberty                             0.0   \n",
       "\n",
       "                   color                               material  \\\n",
       "375                                                       Steel   \n",
       "1416                                                      Metal   \n",
       "1989                                      Stainless steel Metal   \n",
       "2180                                                      Metal   \n",
       "2295                                                      Metal   \n",
       "2337                                                      Metal   \n",
       "2339                                                      Metal   \n",
       "2706              Silver                                  Metal   \n",
       "2965                                                      Metal   \n",
       "2975                                                      Metal   \n",
       "2982              Silver                                  Metal   \n",
       "3300                                                Solid Brass   \n",
       "3421                                                      Steel   \n",
       "3422                                                      Steel   \n",
       "3423                                                      Steel   \n",
       "3775                                                      Metal   \n",
       "3792              Silver                                  Metal   \n",
       "3933               Green                                  Metal   \n",
       "4159         Black Black                                Plastic   \n",
       "4613                                                      Steel   \n",
       "4844                                                      Metal   \n",
       "5260                                                      Metal   \n",
       "6770                                                      Steel   \n",
       "6835                                                Solid Brass   \n",
       "6909                                           Galvanized Steel   \n",
       "6923                                                      Steel   \n",
       "7209         White White                                  Resin   \n",
       "7363                                                      Metal   \n",
       "7461                                                      Metal   \n",
       "7475                                                              \n",
       "...                  ...                                    ...   \n",
       "220714     Chrome Chrome                                  Metal   \n",
       "222464                                                    Steel   \n",
       "223589                                                  Plastic   \n",
       "223642                                                    Steel   \n",
       "223643                                                    Steel   \n",
       "223774   Stainless Steel                        Stainless Steel   \n",
       "224771                                                    Other   \n",
       "226311                                                    Metal   \n",
       "226361                                                    Steel   \n",
       "226826   Stainless Steel                        Stainless Steel   \n",
       "226881                                              Solid Brass   \n",
       "227150                                                    Metal   \n",
       "228862                                                    Metal   \n",
       "229742                                                    Steel   \n",
       "229999                                                    Other   \n",
       "230047                                                    Metal   \n",
       "230825                     Galvanized Steel Metal Plastic/Metal   \n",
       "231360                                                    Steel   \n",
       "232910                                                  Plastic   \n",
       "233124       Matte Black                                  Other   \n",
       "233302                                                    Metal   \n",
       "234873                                                    Metal   \n",
       "235233                                                    Steel   \n",
       "235587       White White                                  Metal   \n",
       "235701                                                  Plastic   \n",
       "236106                                                            \n",
       "237028                                                 Aluminum   \n",
       "237151                                                    Metal   \n",
       "237739                                                 Aluminum   \n",
       "239861                                                  Ceramic   \n",
       "\n",
       "        flag_commercial  flag_residential  \n",
       "375                -1.0              -1.0  \n",
       "1416               -1.0              -1.0  \n",
       "1989               -1.0              -1.0  \n",
       "2180               -1.0              -1.0  \n",
       "2295               -1.0              -1.0  \n",
       "2337               -1.0              -1.0  \n",
       "2339               -1.0              -1.0  \n",
       "2706               -1.0              -1.0  \n",
       "2965               -1.0              -1.0  \n",
       "2975               -1.0              -1.0  \n",
       "2982               -1.0              -1.0  \n",
       "3300               -1.0              -1.0  \n",
       "3421               -1.0              -1.0  \n",
       "3422               -1.0              -1.0  \n",
       "3423               -1.0              -1.0  \n",
       "3775               -1.0              -1.0  \n",
       "3792               -1.0              -1.0  \n",
       "3933               -1.0              -1.0  \n",
       "4159               -1.0              -1.0  \n",
       "4613               -1.0              -1.0  \n",
       "4844               -1.0              -1.0  \n",
       "5260               -1.0              -1.0  \n",
       "6770               -1.0              -1.0  \n",
       "6835               -1.0              -1.0  \n",
       "6909               -1.0              -1.0  \n",
       "6923               -1.0              -1.0  \n",
       "7209               -1.0              -1.0  \n",
       "7363               -1.0              -1.0  \n",
       "7461               -1.0              -1.0  \n",
       "7475               -1.0              -1.0  \n",
       "...                 ...               ...  \n",
       "220714             -1.0              -1.0  \n",
       "222464             -1.0              -1.0  \n",
       "223589             -1.0              -1.0  \n",
       "223642             -1.0              -1.0  \n",
       "223643             -1.0              -1.0  \n",
       "223774             -1.0              -1.0  \n",
       "224771             -1.0              -1.0  \n",
       "226311             -1.0              -1.0  \n",
       "226361             -1.0              -1.0  \n",
       "226826             -1.0              -1.0  \n",
       "226881             -1.0              -1.0  \n",
       "227150             -1.0              -1.0  \n",
       "228862             -1.0              -1.0  \n",
       "229742             -1.0              -1.0  \n",
       "229999             -1.0              -1.0  \n",
       "230047             -1.0              -1.0  \n",
       "230825             -1.0              -1.0  \n",
       "231360             -1.0              -1.0  \n",
       "232910             -1.0              -1.0  \n",
       "233124             -1.0              -1.0  \n",
       "233302             -1.0              -1.0  \n",
       "234873             -1.0              -1.0  \n",
       "235233             -1.0              -1.0  \n",
       "235587             -1.0              -1.0  \n",
       "235701             -1.0              -1.0  \n",
       "236106             -1.0              -1.0  \n",
       "237028             -1.0              -1.0  \n",
       "237151             -1.0              -1.0  \n",
       "237739             -1.0              -1.0  \n",
       "239861             -1.0              -1.0  \n",
       "\n",
       "[742 rows x 13 columns]"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filter_str(df, 'hardware')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No     6939\n",
       "Yes    1481\n",
       "Name: value, dtype: int64"
      ]
     },
     "execution_count": 151,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filter_str(df_attr, 'energy star certified', 'name')['value'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Brands"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_brand = df_attr[df_attr.name == 'MFG Brand Name'][['product_uid', 'value']].rename(columns={'value': 'brand'})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Bullets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "bullet = dict()\n",
    "bullet_count = dict()\n",
    "df_attr['about_bullet'] = df_attr['name'].str.lower().str.contains('bullet')\n",
    "for idx, row in df_attr[df_attr['about_bullet']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    bullet.setdefault(pid, '')\n",
    "    bullet_count.setdefault(pid, 0)\n",
    "    bullet[pid] = bullet[pid] + ' ' + str(value)\n",
    "    bullet_count[pid] = bullet_count[pid] + 1\n",
    "df_bullet = pd.DataFrame.from_dict(bullet, orient='index').reset_index()\n",
    "df_bullet_count = pd.DataFrame.from_dict(bullet_count, orient='index').reset_index().astype(np.float)\n",
    "df_bullet.columns = ['product_uid', 'bullet']\n",
    "df_bullet_count.columns = ['product_uid', 'bullet_count']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Color"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "color = dict()\n",
    "df_attr['about_color'] = df_attr['name'].str.lower().str.contains('color')\n",
    "for idx, row in df_attr[df_attr['about_color']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    color.setdefault(pid, '')\n",
    "    color[pid] = color[pid] + ' ' + str(value)\n",
    "df_color = pd.DataFrame.from_dict(color, orient='index').reset_index()\n",
    "df_color.columns = ['product_uid', 'color']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Material"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "material = dict()\n",
    "df_attr['about_material'] = df_attr['name'].str.lower().str.contains('material')\n",
    "for idx, row in df_attr[df_attr['about_material']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    material.setdefault(pid, '')\n",
    "    material[pid] = material[pid] + ' ' + str(value)\n",
    "df_material = pd.DataFrame.from_dict(material, orient='index').reset_index()\n",
    "df_material.columns = ['product_uid', 'material']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Commercial / Residential Flag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "comres_index = df_attr['name'].str.lower().str.contains('commercial / residential')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Commercial / Residential    5011\n",
       "Residential                 4337\n",
       "Commercial                   182\n",
       "Name: value, dtype: int64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_attr[comres_index]['value'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "flag_comres = dict()\n",
    "df_attr['about_comres'] = df_attr['name'].str.lower().str.contains('commercial / residential')\n",
    "for idx, row in df_attr[df_attr['about_comres']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    flag_comres.setdefault(pid, [0, 0])\n",
    "    if 'Commercial' in str(value):\n",
    "        flag_comres[pid][0] = 1\n",
    "    if 'Residential' in str(value):\n",
    "        flag_comres[pid][1] = 1\n",
    "df_comres = pd.DataFrame.from_dict(flag_comres, orient='index').reset_index().astype(np.float)\n",
    "df_comres.columns = ['product_uid', 'flag_commercial', 'flag_residential']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Indoor/Outdoor Flag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Indoor                      7527\n",
       "Indoor/Outdoor              3925\n",
       "Outdoor                     1204\n",
       "Indoor,Outdoor               256\n",
       "Indoor/Outdoor (Covered)      47\n",
       "Name: value, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filter_str(df_attr, 'indoor/outdoor', 'name')['value'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "flag_inoutdoor = dict()\n",
    "df_attr['about_intoutdoor'] = df_attr['name'].str.lower().str.contains('indoor/outdoor')\n",
    "for idx, row in df_attr[df_attr['about_intoutdoor']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    flag_inoutdoor.setdefault(pid, [0, 0])\n",
    "    if 'Indoor' in str(value):\n",
    "        flag_inoutdoor[pid][0] = 1\n",
    "    if 'Outdoor' in str(value):\n",
    "        flag_inoutdoor[pid][1] = 1\n",
    "df_inoutdoor = pd.DataFrame.from_dict(flag_inoutdoor, orient='index').reset_index().astype(np.float)\n",
    "df_inoutdoor.columns = ['product_uid', 'flag_indoor', 'flag_outdoor']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    11755\n",
       "0     1203\n",
       "Name: flag_indoor, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_inoutdoor['flag_indoor'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Energy Star"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "No     6939\n",
       "Yes    1481\n",
       "Name: value, dtype: int64"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "filter_str(df_attr, 'energy star certified', 'name')['value'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "flag_estar = dict()\n",
    "df_attr['about_estar'] = df_attr['name'].str.lower().str.contains('energy star certified')\n",
    "for idx, row in df_attr[df_attr['about_estar']].iterrows():\n",
    "    pid = row['product_uid']\n",
    "    value = row['value']\n",
    "    flag_estar.setdefault(pid, 0)\n",
    "    if 'Yes' in str(value):\n",
    "        flag_estar[pid] = 1\n",
    "df_estar = pd.DataFrame.from_dict(flag_estar, orient='index').reset_index().astype(np.float)\n",
    "df_estar.columns = ['product_uid', 'flag_estar']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    6939\n",
       "1    1481\n",
       "Name: flag_estar, dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_estar['flag_estar'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Join (this rebuilds df, be sure to rerun subsequent operations.)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.concat((df_train, df_test), axis=0, ignore_index=True)\n",
    "df = pd.merge(df, df_desp, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_brand, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_bullet, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_bullet_count, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_color, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_material, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_comres, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_inoutdoor, how='left', on='product_uid')\n",
    "df = pd.merge(df, df_estar, how='left', on='product_uid')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fill NAs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['brand'].fillna('nobrand', inplace=True)\n",
    "df['bullet'].fillna('', inplace=True)\n",
    "df['bullet_count'].fillna(0, inplace=True)\n",
    "df['color'].fillna('', inplace=True)\n",
    "df['material'].fillna('', inplace=True)\n",
    "df['flag_commercial'].fillna(-1, inplace=True)\n",
    "df['flag_residential'].fillna(-1, inplace=True)\n",
    "df['flag_indoor'].fillna(-1, inplace=True)\n",
    "df['flag_outdoor'].fillna(-1, inplace=True)\n",
    "df['flag_estar'].fillna(-1, inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Relevance Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x17060ba8>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAEPCAYAAAC6Kkg/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFD9JREFUeJzt3V9QVPfdx/HPun9cCYkhGkqJna6CjrWkqdahTCYYHiVT\nMrWByXTSGqfJRWd60evnKjPP1TPTmz5XvepdZ5ppaY0hOsWKKA1KdBzJkEwjlNSqTDAGMIoi6C67\n7HkuqKcQ+XO+7Dm7IO/X1dnfnv3+fmfPj/2w5+yeDTmO4wgAAIM1hR4AAGDlITwAAGaEBwDAjPAA\nAJgRHgAAM8IDAGAWCbJ4X1+f+vv7lc1mVVNTo02bNmloaEgdHR0Kh8Oqq6tTWVmZJPnWDgDIAydA\np0+fdpePHTvmOI7jHD161G1777333GW/2gEAwQv0sNWePXseaisqKnKXY7GY7+0AgODl5ZxHe3u7\ndu/eLUlyZnyhfeaLvl/tAIDgBR4enZ2dqqysVGlpqSRpampqzvX8agcABC/QE+ZdXV0qLS3Vli1b\n3LZkMhno8lw6OjqMIwcASNK+ffvmbA8sPEZGRtTV1aXt27erv79f4+PjeuONN1RdXa3m5mY5jqP6\n+np3fb/a57Nr1y5/NxAAHnE9PT3z3hdynEf/qrodHR2EBwAY9fT0zPvOgy8JAgDMCA8AgBnhAQAw\nIzwAAGaEBwDAjPAAAJgF+iVBAECwsncnpYm0fwUfi2rN44tf8onwAICVbCKtzMkB38pFXkpIHsKD\nw1YAADPCAwBgRngAAMwIDwCAGeEBADAjPAAAZoQHAMCM8AAAmBEeAAAzwgMAYEZ4AADMCA8AgBnh\nAQAwIzwAAGaEBwDAjPAAAJgRHgAAM8IDAGBGeAAAzAgPAIAZ4QEAMCM8AABmhAcAwIzwAACYER4A\nADPCAwBgRngAAMwIDwCAGeEBADAjPAAAZoQHAMCM8AAAmBEeAACzSKEHAGD5G7k3qVupjG/1nlob\nUWlRzLd6yD/CA8CibqUy+r+PPvet3n/vfIbwWOE4bAUAMCM8AABmhAcAwIzwAACYBRoe2WxWU1NT\nQXYBACiAwD5t1dbWpqtXr6qxsVHl5eWSpNbWVjdMKioqVFVVJUkaGhpSR0eHwuGw6urqVFZWtqR2\nAEB+BBYeDQ0N6uvrm9UWj8dVX1//0LoXLlzQwYMHJUlHjhxRU1PTktoBAPmR1+95ZDIZtbS0SJIS\niYR27dolSSoqKnLXicX+89lvazsALCY1kVU66W/NaFxa+9jqOoWc1/BoaGhwl48fP+4uO47jLs8M\nA2s7ACwmnZQun8/6WrOiZo3WPuZryWWvYN8wj0aj7vJ8J9Wt7QspKSkxPwbAtOjdSX/rRSIF+5tM\n3rkjyd/tiUQiKilZ72tNr+58mZJ/F46Z3pb1HvZNXsNjYGBAiURCkpRKpdz2ZDLpy/JCRkdHzeMF\nMC2d8fPlabpeof4mMxl/33VM1yzc9mR93jdetyWw8Dh16pSuXLmidevWKZFIqLa2VoODg+ru7lYo\nFNLOnTvddaurq9Xc3CzHcWadULe2AwDyI7DwmOtFvba2ds51y8vLdeDAgZzbAQD5sbo+HgAA8AXh\nAQAwIzwAAGaEBwDAjPAAAJgRHgAAM8IDAGBGeAAAzAgPAIAZ4QEAMCM8AABmhAcAwIzwAACYER4A\nADPCAwBgRngAAMwIDwCAGeEBADAjPAAAZoQHAMCM8AAAmBEeAAAzwgMAYEZ4AADMCA8AgBnhAQAw\nIzwAAGaEBwDAjPAAAJgRHgAAM8IDAGBGeAAAzAgPAIBZpNADAB5Fw/fGdTN1z9eaG9YW6WtFxb7W\nBJaK8AACcDN1T7/66IyvNd/auYfwwLLBYSsAgNmSwmN4eNjvcQAAVhBP4dHX1zfr9rlz5wIZDABg\nZfAUHpcuXZp1OxwOBzIYAMDKsKTDVtls1u9xAABWkAU/bdXa2qr79+/r008/VTqdluM4CoVCKi0t\nzdf4AADL0ILhsX//fknSX/7yF/3oRz/Ky4AAAMufp8NWL7/8ctDjAACsIJ7CIxKZ/QZlfHw8kMEA\nAFYGT98wHxsbU2dnpzKZjCSpv79fb731VqADAwAsX57Co62tTa+88ori8bgk6cMPPwx0UACA5c3T\nYatYLOYGhyTt3r07sAEBAJY/T+ExPj7uHrKSHv7G+Xyy2aympqaWNjIAwLLl6bDVtWvX9Pvf/15P\nPPGEHMfRp59+qh07diz4mLa2Nl29elWNjY0qLy+XJA0NDamjo0PhcFh1dXUqKyvztR0AkB+ewuMn\nP/mJNm/e7N6+ePHioo9paGh46B3KhQsXdPDgQUnSkSNH1NTU5Gs7ACA/PB22mhkcklRVVbWkzoqK\nitzlWCzmezsAID88vfP46juI7u5uvfnmm+bOHMdxl2e+6PvVDgDID0/hce7cOdXW1spxHN26dUvp\ndHpJnc138tyv9oWUlJSYHwMsVfTuqP81I9GCzePo3Ul/60UiBduW5J07kvzdnkgkopKS9b7W9OrO\nlyllFl/Ns0gkovUe9o2n8Kivr1cikXBvDw4OLmlQyWQy0OWFjI76/8cMzCedWdo/WIvVLNQ8Tmf8\nfHmarleobclk/L8qeKaA25P1ed943RZP4TEzOCRvh4pOnTqlK1euaN26dUokEqqtrVV1dbWam5vl\nOI7q6+vddf1qBwDkh6fwOHz4sLvsOI7WrVu36GPmelEvLy/XgQMHAmsHAOSHp/AoLi5WQ0ND0GMB\nAKwQnj6qS3AAAGby9M5Dks6fP6/r16+rvLxcNTU1QY4JALDMeXrncerUKW3cuFGvvvqqNmzYoPb2\n9qDHBQBYxjyFx9jYmCorKyVJW7du5cegAGCVW9IvCYbD4UAGAwBYGTyFx+TkpCYmJiRNX559qd8w\nBwA8GjydMN+/f79aW1vlOI7C4bD2798f9LgAAMuYp/CIx+P68Y9/HPRYAAArhKfDVqOjo7p3756k\n6R9i8no9KQDAo8lTePzhD3/QpUuXJE1/2/zEiROBDgoAsLx5Co9vfOMbeu655yRNhwcnzAFgdfMU\nHqFQaNbtaDQayGAAACuDp/DIZDLur/dls1ndv38/0EEBAJY3T5+22rt3r5qbmxWLxZRMJvXyyy8H\nPS4AwDLmKTyefPJJvf7660GPBQCwQng6bAUAwEyEBwDAjPAAAJgRHgAAM8IDAGBGeAAAzAgPAIAZ\n4QEAMCM8AABmhAcAwIzwAACYER4AADPCAwBgRngAAMwIDwCAGeEBADDz9GNQQD7cvjesseRN3+o9\nEd+gJ4u+5ls9AP9BeGDZGEve1KGe//Wt3mu7/ofwAALCYSsAgBnhAQAwIzwAAGaEBwDAjPAAAJgR\nHgAAM8IDAGBGeAAAzAgPAIAZ4QEAMCM8AABmhAcAwIzwAACYER4AALO8X5K9tbVVU1NTkqSKigpV\nVVVJkoaGhtTR0aFwOKy6ujqVlZUtqR0AELy8h0c8Hld9ff1D7RcuXNDBgwclSUeOHFFTU9OS2gEA\nwct7eGQyGbW0tEiSEomEdu3aJUkqKipy14nFYu6ytR0AELy8h0dDQ4O7fPz4cXfZcRx3eWYYWNsB\nAMEr6M/QRqNRd/nBeZCvsrbPp6SkxLQ+8u+Lu/5Ox2g0UrD9Hr076n/NSLSA2zPpb71I4fZN8s4d\nSf5uTyQSUUnJel9renXny5QyPtaLRCJa72Hf5D08BgYGlEgkJEmpVMptTyaTvizPZ3TU/z9m+Cud\n9vNPYLpeofZ7OpMOpGbhtsfnfZMp3L7JZLIB1Czc9mR93jdetyXv4TE4OKju7m6FQiHt3LnTba+u\nrlZzc7Mcx5l1Qt3aDgAIXt7Do7a2ds728vJyHThwIOd2AEDw+JIgAMCM8AAAmBEeAAAzwgMAYEZ4\nAADMCA8AgBnhAQAwIzwAAGaEBwDAjPAAAJgRHgAAM8IDAGBGeAAAzAgPAIAZ4QEAMCM8AABmhAcA\nwIzwAACY5f1naOGf1PiwJu/f9LVmbN0GrS3+mq81ATx6CI8VbPL+TV0+8ytfa1bseYvwALAoDlsB\nAMwIDwCAGeEBADAjPAAAZoQHAMCM8AAAmBEeAAAzwgMAYLbqviSYvTsh3Uv6V7AorjWPP+ZfPQBY\nAVZdeOheUtn3z/tWbs1/1UiEB4BVhsNWAAAzwgMAYEZ4AADMCA8AgBnhAQAwIzwAAGaEBwDAjPAA\nAJgRHgAAM8IDAGBGeAAAzAgPAIAZ4QEAMCM8AABmhAcAwIzwAACYER4AALMV+0uCQ0ND6ujoUDgc\nVl1dncrKygo9JABYNVbsO48LFy7o4MGD+ulPf6rz5/37WVkAwOJWbHgUFRW5y7FYrIAjAYDVZ8WG\nh+M47jLhAQD5tWLDY2pqqtBDAIBVK+TM/Bd+BTly5IiampokSa2trdq/f/+863Z0dORrWADwSNm3\nb9+c7Ss2PK5fv67Tp0/LcRzV19ertLS00EMCgFVjxYYHAKBwVuw5DwBA4RAeAACzVR0e2Ww2sE9t\nBVm7EP3ko79H9TkDHkUr9vIkuWpra9PVq1fV2Nio8vLyBde1XgrFUruvr0/9/f3KZrOqqanRpk2b\nJE1/guzBC1xFRYWqqqpy6ufcuXMaHh5WNBrVt771LVVUVCzYnut2eRn/UmtL0sDAgD7++GOFw2E1\nNDQoGo3q17/+tbZt2ybHcXTz5k39/Oc/z6mf+epZt22+fTwX61yz1M5lrln68WOuWfu07BNLXWnp\nc83Sj19zzfIc53yJJ2cV6+3tdT7//PNF1zt69Ki7/N577/la+/Tp0+7ysWPH3OWTJ0/62s9MM/vx\n0r6U/ryOfym1b9++7fT29j7Ufv/+fXe5ra0t537mq2fdtvn28Vysc81SO5e5ZulnplzmmqVPyz6x\n1M1lrln68WuuzeT3XPuqVfvOwyLIS6Hs2bNnzvZMJqOWlhZJUiKR0K5du3Lua2BgQIcOHVJlZaWn\n9lwEMf4Hent7FY/H1dLSoqqqKm3btk2SFI/HJUk3btzQ008/nXM/89Wzbtt8+3gu1rlmqZ3LXLP0\nI/kz1yx9WvaJpW4uc83Sj19zTfL+HOf8umaOm0eI1/9A29vb3eWg3hGcOHHCGR4envO+48eP+9ZP\nOp12bt686bk91/4cZ+HxL6X2H//4R6ejo8NxHMdpbW11pqamZt3//vvvO8lkMud+vNTzum2Os/A+\nfmApc81rbS/rLrY9ln5ynWtL6dNxvO8TL3VznWte+/FSzzLXvDzHS51rD6zqE+ZeBX1ytbOzU5WV\nlfN+0TES8e8NYiQS0cWLFz23+9Wn3/Xq6uokSc8884xGR0dn3Z9MJrV27Vrf+luontdtW2wfP7CU\nuea1tpd1F9oeSz8PauU616x9PqjvV91c55p1/H7MtQfrLvYc5/q6Rnh8RW9vr/r6+ma1JZPJOZf9\nqN3V1aXS0lJt2bJlVvvAwIC7nEqlcu7n1q1b7vL169cXbc+1v1zGv1jtrVu36sqVK5Kk0dFRrV+/\n3r0vm81qzRr7tJ6rn/nqWbdtvn3sx1yz1M5lrln68WuuWfq07BNL3VzmmqWf+epZ59p8z3EQr2ur\n9pzHqVOndOXKFa1bt06JREK1tbWSpj+tEAqFtGPHDnfd6upqNTc3u5dC8av2yMiIurq6tH37dvX3\n92t8fFxvvPGGJGlwcFDd3d0KhULauXNnzttw9uxZpVIpZTIZfec731m0Pdf+vIx/qbW/+93v6vDh\nw7p48aI2btw46z+yzz77TFu3bvWln/nqWbZtoX2c61yz1M5lrlm3wY+5Zu3T6z6x1l3qXLP2M189\n69/RfM+xH69rX8XlSQAAZhy2AgCYER4AADPCAwBgRngAAMwIDwCAGeEBADBbtd/zAPxw+vRpXbt2\nTZs2bdKLL75Y6OEAeUN4ADl48cUX9eWXX6q3t7fQQwHyivAAJH344Yc6fvy4fvazn+ncuXMqKirS\njh07tHHjRrW3t6uoqEiZTEbl5eWqqamZ9dj5vmd75swZ3bhxQ9FoVKlUSg0NDXr88cf1ySefqKWl\nRfv27dMLL7zg3t67d69qa2s1MjKizs5OxWIxpdNpbd++Xc8++6wkqbu7W+3t7dqyZYuKi4uVTqf1\n6quvun3+61//0t///ndFIhFls1ldv35dv/zlLyVJ//znP/Xxxx8rHo8rlUpp9+7d2rx5c0DPKB55\n5kspAo+oQ4cOPfTbDL/73e+cdDrt3j579qwzMDAwa52RkRGns7NzVltfX5/zwQcfuLcnJyedd955\nx7195swZ59q1a+5977777rzj+vOf/zzr9m9/+1t3+aOPPnIuXbrkOM70b0IcPnx41rpnz551HMdx\nJiYmnLfffnvWfe+8886sbQMseOcB/FsoFNIPfvCDWW1DQ0M6duyYezubzSoajeqb3/zmgrUuX76s\nVCqlo0ePSpp+dzIxMeHe//zzz+vdd9/Va6+9prNnz+qFF15w77tx44a6uroUDoe1Zs0a3b59e1bt\nDRs2uMvxeFyTk5OSpi+E99WL8D3//POSpOHhYY2Njc0aTzKZ1K1bt0xXrAUeIDyAf3PmOPy0bds2\n/fCHP5x1QbxMJrPoY5999ll98cUXsw5xzXxcOBxWcXGxxsbGdOPGjVkv4CdOnNDrr7/uXmH1T3/6\n06LjlKSvf/3r6unpmXUBvcHBQT399NPatGmTysrK1NjY6N6XzWbnrAN4wYURAUl//etf1dPT455b\neO6555RIJDQxMaGTJ08qHA7LcRzdu3dP3//+97V582YNDQ3p/PnzmpiY0MjIiLZs2aLKykp9+9vf\nliT19PTos88+05o1azQ1NaVQKKSmpia3z4mJCf3mN79RY2PjrKud/u1vf9OdO3cUCoWUTqd1+fJl\nvfTSS/re976nzs5OffDBB/rFL36h0tJSvf3228pms3rzzTclSf/4xz/0ySefKBaLKZPJ6KmnntLe\nvXsl/ecKrQ+CcHx8XK+88oqKi4vz8hzj0UJ4AADM+JIgAMCM8AAAmBEeAAAzwgMAYEZ4AADMCA8A\ngBnhAQAwIzwAAGb/D4P+N1U3LZf/AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xd8e1518>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.countplot(x='relevance', data=df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['majority_relevance'] = df['relevance'].map(lambda x: x in [1.0, 1.33, 1.67, 2.0, 2.33, 2.67, 3.0])\n",
    "def majoritize(df):\n",
    "    return df[df['majority_relevance'] == 1]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## External Data Utilization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Fix Typos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['search_term'] = df['search_term'].map(correct_typo)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pre-Stemming Attributes Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_commercial'] = (df['search_term'].str.lower().str.contains('commercial') & df['flag_commercial']).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "350.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_commercial'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_residential'] = (df['search_term'].str.lower().str.contains('residential') & df['flag_residential']).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "56.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_residential'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def filter_estar(df):\n",
    "    return df['search_term'].str.lower().str.contains('energy star') |\\\n",
    "    df['search_term'].str.lower().str.contains('energy efficient')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_estar'] = (filter_estar(df) & df['flag_residential']).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "60.0"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_estar'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_indoor'] = (df['search_term'].str.lower().str.contains('indoor') & df['flag_indoor']).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "591.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_indoor'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_outdoor'] = (df['search_term'].str.lower().str.contains('outdoor') & df['flag_outdoor']).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3509.0"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_outdoor'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    240760.000000\n",
       "mean          0.014575\n",
       "std           0.119843\n",
       "min           0.000000\n",
       "25%           0.000000\n",
       "50%           0.000000\n",
       "75%           0.000000\n",
       "max           1.000000\n",
       "Name: match_outdoor, dtype: float64"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['match_outdoor'].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Stemming & Tokenizing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['search_term'] = df['search_term'].map(lambda x: str_stem(x))\n",
    "df['product_title'] = df['product_title'].map(lambda x: str_stem(x))\n",
    "df['product_description'] = df['product_description'].map(lambda x: str_stem(x))\n",
    "df['brand'] = df['brand'].map(lambda x: str_stem(x))\n",
    "df['bullet'] = df['bullet'].map(lambda x: str_stem(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['color'] = df['color'].map(lambda x: str_stem(x))\n",
    "df['material'] = df['material'].map(lambda x: str_stem(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['tokens_search_term'] = df['search_term'].map(lambda x: x.split())\n",
    "df['tokens_product_title'] = df['product_title'].map(lambda x: x.split())\n",
    "df['tokens_product_description'] = df['product_description'].map(lambda x: x.split())\n",
    "df['tokens_brand'] = df['brand'].map(lambda x: x.split())\n",
    "df['tokens_bullet'] = df['bullet'].map(lambda x: x.split())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# slow, not obvious improvment\n",
    "# df['tokens_search_term'] = df['search_term'].map(lambda x: word_tokenize(x))\n",
    "# df['tokens_product_title'] = df['product_title'].map(lambda x: word_tokenize(x))\n",
    "# df['tokens_product_description'] = df['product_description'].map(lambda x: word_tokenize(x))\n",
    "# df['tokens_brand'] = df['brand'].map(lambda x: word_tokenize(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Meta-Features"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['len_search_term'] = df['tokens_search_term'].map(lambda x: len(x))\n",
    "df['len_product_title'] = df['tokens_product_title'].map(lambda x: len(x))\n",
    "df['len_product_description'] = df['tokens_product_description'].map(lambda x: len(x))\n",
    "df['len_brand'] = df['tokens_brand'].map(lambda x: len(x))\n",
    "df['len_bullet'] = df['tokens_bullet'].map(lambda x: len(x))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Post-Stemming Attributes Features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def match_color(st, colors):\n",
    "    for w in st:\n",
    "        if w in colors:\n",
    "            return True\n",
    "    return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_color'] = df.apply(lambda x: match_color(x['tokens_search_term'], x['color']), axis=1).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19251.0"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_color'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def match_material(st, materials):\n",
    "    for w in st:\n",
    "        if w in materials:\n",
    "            return True\n",
    "    return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['match_material'] = df.apply(lambda x: match_material(x['tokens_search_term'], x['material']), axis=1).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13907.0"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(df['match_material'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Flag & Count & Ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['flag_st_in_pt'] = df.apply(lambda x: int(x['search_term'] in x['product_title']), axis=1)\n",
    "df['flag_st_in_pd'] = df.apply(lambda x: int(x['search_term'] in x['product_description']), axis=1)\n",
    "df['flag_st_in_br'] = df.apply(lambda x: int(x['search_term'] in x['brand']), axis=1)\n",
    "df['flag_st_in_bl'] = df.apply(lambda x: int(x['search_term'] in x['bullet']), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['num_st_in_pt'] = \\\n",
    "    df.apply(lambda x: len(set(x['tokens_search_term']).intersection(set(x['tokens_product_title']))), axis=1)\n",
    "df['num_st_in_pd'] = \\\n",
    "    df.apply(lambda x: len(set(x['tokens_search_term']).intersection(set(x['tokens_product_description']))), axis=1)\n",
    "df['num_st_in_br'] = \\\n",
    "    df.apply(lambda x: len(set(x['tokens_search_term']).intersection(set(x['tokens_brand']))), axis=1)\n",
    "df['num_st_in_bl'] = \\\n",
    "    df.apply(lambda x: len(set(x['tokens_search_term']).intersection(set(x['tokens_bullet']))), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['ratio_st_in_pt'] = \\\n",
    "    df.apply(lambda x: x['num_st_in_pt'] / float(x['len_search_term']), axis=1)\n",
    "df['ratio_st_in_pd'] = \\\n",
    "    df.apply(lambda x: x['num_st_in_pd'] / float(x['len_search_term']), axis=1)\n",
    "df['ratio_st_in_br'] = \\\n",
    "    df.apply(lambda x: x['num_st_in_br'] / float(x['len_search_term']), axis=1)\n",
    "df['ratio_st_in_bl'] = \\\n",
    "    df.apply(lambda x: x['num_st_in_bl'] / float(x['len_search_term']), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sns.set_palette(\"husl\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1a154240>"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF6JJREFUeJzt3VtsFOfBxvFnvbYBiwI2xjjGah0wyHWgoQRcQjm4wSFu\nmmKLJogtIUQq0Ip830V7U6ntbVWpuai+pEFJGqlpSbtpONSkMRCCE3NykSGUAGsODcZJgS7GeG3L\nhvVp57tAbLz4sLNkZmcX/39SpJnxu7NPlpEfz8zuuy7DMAwBAEa1FKcDAACcRxkAACgDAABlAAAQ\nZQAAUBzKIBQKqb+/3+6nAQB8Cal27nzv3r26dOmSKioqlJeXN+JYv9+vmpoaud1ulZaWKjc3185o\nAIABXHZ/zqChoUGTJk2KWgbvvvuuVq5cKUmqqqpSZWWlnbEAAAMkzD2DjIyM8HJ6erqDSQBg9EmY\nMhh4gkIZAEB8JUwZcJMZAJxj6w3k4fh8PrlcLhUXF4e3BYPBIZeHU1NTY0s2ALjfLV++fNA2W8tg\n//79amxs1Lhx41RQUKAlS5ZIkurq6gaVQUlJibxerwzDUFlZman9z5s3z5bckuTxeOT1em3bv93I\n75xkzi45k9/j8cizaWtcn9MM7+vrTL0WHo9Hby5/LQ6JYvN8zY8H5T9x4sSQY20tg+F+qW/cuHHQ\ntry8PHk8HjvjAACGkTD3DAAAzqEMAACUAQCAMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIA\nAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAA\niDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAASEq1c+d+v181\nNTVyu90qLS1Vbm7usGObmpp0/PhxhUIhLVy4UF/96lftjAYAGMDWMqivr9fatWslSVVVVaqsrBx2\n7NmzZ/X0009LkqqrqykDAIgjW8sgIyMjvJyenj7i2P7+foVCIUnS2LFj7YwFALiLrWVgGEZ4OVoZ\nzJ49Wy+++KJcLpfWrFljZywAwF1sLYP+/n7TYz/55BP9/Oc/l2EY2rFjh6nLRJmZmV8mnuP7txv5\nnRPv7J5nn1WgpcW6/Xk8luwnMztb3rfesmRfTknm41Ayn9/WMggGg0Mu+3w+uVwuFRcXh7eNGTNG\nkuRyuTRhwgRT+w8EAhYldWb/diO/c+KdPdDSojEbfhrX5zQj8MbvkvrfUUru41Ayn9/WMigpKZHX\n65VhGCorKwtvr6urG1QGBQUF2rlzpyRpzpw5dsYCANzF1jLIy8sb8nRz48aNg7YVFRWpqKjIzjgA\ngGHwoTMAAGUAAKAMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwA\nAKIMAAD6EmVw+vRpK3MAABxkugwOHjwYsX7p0iXLwwAAnGG6DFpbW+3MAQBwkOkycLlcEevd3d2W\nhwEAOCPqdyC/9957unXrls6fP6/e3l4ZhiGXy6WHH344HvkAAHEQtQyeeuopSbfvGSxdutT2QACA\n+DN9mYgiAID7V9Qzg4EaGhrU2NioWbNmadasWXZlAgDEmekzg/3796urq0srVqxQe3u7amtrbYwF\nAIgn02UQCAS0YMECpaena8GCBbzVFADuI6bLID09fcjtHR0dloUBADjD9D2Drq4utbS0KDs7Wzdu\n3JBhGJKk2tparVy50raAo8n/vPAT3Whtt2x/Ho/Hkv1Mzpqo37/y6ohjNr/wEwUszC5Zlz8za6K2\nRMn/kxc2q701YMnzSdZln5iVqVdf2WJqbPcbv7PkOZ3ifX2d0xFGNdNl4Pf79cYbb2jatGm6cuWK\nCgoKtG3bNp0/f54ysMiN1nb98gcx3dOPi1/viP5LPtDaru88b8QhTew+ejN6/vbWgNybyuOQJjbt\nr+81PXbMhp/amOTexFJQnk1bbUxyb0ZTQZn+zbN06VLNnz9/0Pbjx49bGggAEH+m7xkMVQR3tu/f\nv9+yQACA+LPk+ww6Ozut2A0AwCGWlMHdk9gBAJIL33QGAKAMAACUAQBAlAEAQDHOWjqc5cuXD7nd\n7/erpqZGbrdbpaWlys3NHXE/TU1NOnnypNxut8rLy5WWlmZFPABAFDGVwaeffqqenh5J0rFjx7R+\n/XpJ0vjx44ccX19fr7Vr10qSqqqqVFlZOey+29vbdfPmzRHHAADsYfoy0c6dO9XZ2amUlJTwf9Fk\nZGSEl4eb6O4On8+nYDConTt36sKFC2ZjAQAsYPrMIC0tTXPnzg2vFxUVRX3MncnspOhl8Nlnn2nq\n1KlatWqVqqurVVhYaKpwAABfnunftnd/yri5uTnqY/r7+00HSU1NVWlpqSRp2rRpCgSsm0ESADAy\n02cGjY2Neu211zR58mQZhqHz58/rV7/61YiPCQaDQy77fD65XC4VFxeHt82cOVONjY0qLCxUIBDQ\n7Nmzo2bKzMw0G/+e2L3/ZJLsr0Uy50/m7NLoyJ8zeYqer/lxHNLEJmfyFNOvv+kyWLFihRYsWBBe\nN/O1lyUlJfJ6vTIMQ2VlZeHtdXV1g8pg7ty52r59u86cOaPs7GylpkaPZvfZA2cnX0j21yKZ8ydz\ndml05P+/379k2fN5PB55vV7L9mf29TddBgOLQFL4ks5I8vLyhvySj40bNw45/umnnzYbBwBgoXu+\nQ3vt2jUrcwAAHHTPZXD06FErcwAAHBT1MlFLS4syMzMjrjsZhqGuri5bgwEA4idqGbzzzjt65pln\n9NZbb2nevHnhzw5wmQgA7h9Ry2Dz5s2SpBkzZmjZsmXh7W1tbfalAgDElel7BuXl5RHrhYWFlocB\nADjDdBncPZ3EwA+FDfxAGQAg+Vgy+c++ffus2A0AwCGWlEEoFLJiNwAAh1hSBi6Xy4rdAAAcwhzR\nAADKAABAGQAAZFEZjBkzxordAAAccs9l0NfXF16++wNpAIDkYroM7v6ay3fffdfyMAAAZ5gug3/+\n858R62lpaZaHAQA4454vE92ZvRQAkPyizlpaV1ennp4eNTY26sCBA5Ju3y/o7e21PRwAID6ilkFR\nUZH6+/t15coVPfTQQzIMQ263W1lZWfHIF5P/3fyCWgKtlu1vqO9vvhfZmVl6ecsrpsb+ekdf9EEJ\n6qM3k/uT6P2v73U6wqiVlZUt7+vrnI4xSFZWttMR4iZqGdz5pf/4448rOzuxX5iWQKu2VjzrdIxB\n1u16y/TYX/4g6j9J3JktqO88n5iXDs2WlHtT4r0rbrQU1CuvvGzZvjwej7xer2X7Gy1M3zPIycmx\nMwcAwEGmy+DixYvq7OxUZ2en9uzZo46ODjtzAQDiyHQZfPTRR+rp6dEHH3ygRx99VNXV1XbmAgDE\nkekymDp1qrKysjRu3DhNmjRJGRkZduYCAMSR6TLo7e1Vf3+/UlNv3+DkcwYAcP8wXQYpKSn685//\nrLlz56qlpUWdnZ125gIAxJHp9zFWVlYqFAopJSVFly9fVnFxsZ25AABxFNN0FCkpt4fn5+dr3rx5\n4e2XL1+2NhUAIK4s+T6Djz/+2IrdAAAcYkkZcDMZAJKbJWXgciX3nDQAMNrxHcgAAMoAAMA9AwCA\nLCqD/Px8K3YDAHBITGXQ0tKi+vp6tbS0RGyfP3++paEAAPFlugxOnTql48ePKy8vT/X19Tp16lTU\nx/j9fv3lL3/R22+/Lb/fH3V8MBjUb37zG129etVsLACABUyXwYULF1ReXq78/Hw9+eSTOn/+fNTH\n1NfXa+3atVqzZo2OHj0adXxtba2eeOIJs5EAABYxXQZpaWkR6+np6VEfM3Ca62jjA4GAMjIyNHbs\nWLORAAAWMV0Gt27dGnF9KAPfZRStDA4fPqzFixebjQMAsJDpWUtLS0v1pz/9SVOnTlVzc7NWrFgR\n9TH9/f2mg1y7dk3/+Mc/dO3aNRUUFCgvL8/0YwEAX47pMsjNzdX69evV2dmp8ePHm3pMMBgcctnn\n88nlckVMg71hwwZJUkNDgyZNmmRq/5mZmabGJYJkyjoU8jvHTPbM7GwF3vhdHNLEJjM725HXPpn/\nvSVn8psugzvMFoEklZSUyOv1yjAMlZWVhbfX1dUNKgNJam1t1eHDh02fGQQCAfPBHZZMWYdCfueY\nyb7l5Zctez6PxyOv12vZ/px47ZP531tyJn/MZXDHgQMHtGzZshHH5OXlyePxDNq+cePGIcdnZWVp\n06ZN9xoJAHCP7vkTyG1tbVbmAAA4KOqZwZYtW7R69Wrt3r07/FZRwzB0/vx5VVRU2B4QAGC/qGXw\nzDPPaNKkSZowYYIqKyvD26uqqmwNBgCIn6hlMGXKFElSYWFhxPY734cMAEh+pn+jz549O2K9vLzc\n8jAAAGfc85/3ZqajAAAkB9Nl0NfXF7H+4YcfWh4GAOAM02WwZ8+eiPXOzk7LwwAAnGG6DGKZZwgA\nkFyivpuora1Nvb29unnzZvgbzvr6+jgzAID7SNQy8Pl86u3tld/vl8/nk2EYSk1N1fe///145AMA\nxEHUMvj2t78tScrJyRk0sVwiWrfrLacjIAlNzMpU++t7nY4xyMSs5J59E8nD9ER1yVAEkrS14lmn\nIwxCQSW+V1/ZYtm+rJ71E4iHe/6cwccff2xlDgCAg0yfGZw+fVpnz55VW1ubJkyYoKamJj3yyCN2\nZgMAxInpM4Nz585p9erVeuCBB7RmzRrNmDHDzlwAgDgyXQZpaWmSpFAoFLEOAEh+psugp6dH0u0P\nn4VCIRmGYVsoAEB8mS6D0tJSSdKSJUv017/+VRMnTrQrEwAgzkzfQG5ra1NOTo6mTJmiZ59NvLdv\nAgDunekzg4aGhoj1pqYmq7MAABxiugxu3rwZsf7JJ59YHgYA4AzTZbBo0SK9//77dmYBADjEdBls\n3bpVHR0d2r59u7Zt26bTp0/bmQsAEEembyB/97vf1fz588Pru3fvtiUQACD+TJ8ZDCwCSXryySct\nDwMAcMY9T1QHALh/UAYAAMoAAEAZAAAUw7uJYL/JWRP16x3tTscYZHJW9HmoMrMm6qM3Ey+7dDsb\ngJFRBgnk96+8atm+4v3Vi1sszC7x1ZFAvHGZCABAGQAAKAMAgCgDAIAoAwCAbH43kd/vV01Njdxu\nt0pLS5Wbmzvs2IaGBp07d06hUEgLFy5Ufn6+ndEAAAPYWgb19fVau3atJKmqqkqVlZXDjm1padGq\nVask3Z4RlTIAgPix9TJRRkZGeDk9PX3EsUuXLrUzCgBgBLaWgWEY4eVoZXDHvn37Bk2XDQCwl62X\nifr7+2MaX1tbq8LCQuXk5Jgan5mZeS+xHOFE1mR6fYaSzPmTObtEfqc5kd/WMggGg0Mu+3w+uVwu\nFRcXh7cdOnRIOTk5mj59uun9BwIBa4LGgRNZk+n1GUoy50/m7BL5neZEflvLoKSkRF6vV4ZhqKys\nLLy9rq4uogyam5t16NAhFRUV6dy5c+rs7NRzzz1nZzQAwAC2lkFeXp48Hs+g7Rs3boxYz8nJ0S9+\n8Qs7owAARsCHzgAAlAEAgDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgy\nAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIA\nAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAEhKtXPnfr9fNTU1crvdKi0tVW5u\nriVjAQDWsvXMoL6+XmvXrtWaNWt09OhRy8YCAKxlaxlkZGSEl9PT0y0bCwCwlq1lYBhGeDnaL/hY\nxgIArGVrGfT399syFgBgLZcx8E9yi1VVVamyslKS9N577+mpp56SJPl8PrlcLhUXF0cdO5yamhqb\nUgPA/W358uWDttlaBlevXtWBAwdkGIbKysqUk5MjSfrDH/4gl8ulDRs2RB0LALCfrWUAAEgOfOgM\nAEAZAABGeRmEQqGkexdTMmYeKNnzA/crW6ejSGR79+7VpUuXVFFRoby8vBHHJspUGbFkrqur07Vr\n15SWlqavf/3rmjFjxojb4yGW/JLU1NSkkydPyu12q7y8XGlpaXrxxRc1a9YsGYahGzdu6Ec/+lEc\nkg/W0NCgc+fOKRQKaeHChcrPzx92bKIcP7FkTsTjJ5b8UmIfP7G8jnE7foxRzOfzGVeuXIk6bteu\nXeHlv//973ZGisps5oGqq6tj2m4ns/nb2toMn883aPutW7fCy3v37rU0WywOHDgQXo72OibK8RNL\n5oES5fiJJX+iHz8DJcrxM6ovE5mVrFNlNDU16be//a2CwaCp7YnE5/MpGAxq586dunDhQnj72LFj\nJUnXr1/XlClTnIqnpUuXmh6bKMdPLJmlxDt+Ysmf6MePZP51jNfxM2ovE8XCSNKpMgoKCvSzn/1M\nHR0dprYnks8++0xTp07VqlWrVF1drcLCQqWkfPG3i8/n06OPPupgwtv27dun+fPnjzgm0Y4fM5ml\nxD1+zORPhuPH7OsYr+OHMwMTkvmGZ2pqqs6cOWN6e6JITU1VaWmpJGnatGkKBAIRPw8GgxozZowD\nyb5QW1urwsLCqB+QTKTjx2zmOxLt+DGbPxmOH8nc6xiv44cyuIvP51NDQ0PEtoGncYl4aWWozK2t\nreHlq1evRt3upKHyz5w5U42NjZKkQCCgiRMnhn8WCoUi/spzwqFDh5STk6Pp06dHbE/k4yeWzIl4\n/MSSP9GPn+FeRyePn1F7mWj//v1qbGzUuHHjVFBQoCVLlki6fZf/7nmTSkpK5PV6w1NlOCWWzEeO\nHFF3d7f6+vr0jW98I+r2RMs/d+5cbd++XWfOnFF2drZSU784VD///HPNnDkzrtkHam5u1qFDh1RU\nVKRz586ps7NTzz33nKTEPX5izZxox0+s+RP5+JGGfx2dPH6YjgIAwGUiAABlAAAQZQAAEGUAABBl\nAAAQZQAA0Cj+nAEwnAMHDujy5cvKz8/XsmXLnI4DxAVlANxl2bJlamlpkc/nczoKEDeUAe5bx48f\n1549e7Ru3TrV1dUpIyNDxcXFys7O1r59+5SRkaG+vj7l5eVp4cKFEY8d7rOYBw8e1PXr15WWlqbu\n7m6Vl5frK1/5ik6fPq2dO3dq+fLlWrx4cXj9scce05IlS9Tc3Kza2lqlp6ert7dXRUVFmjNnjiTp\n2LFj2rdvn6ZPn67x48ert7dXq1atCj/np59+qlOnTik1NVWhUEhXr17V5s2bJUkXLlzQyZMnNXbs\nWHV3d2v+/Pl68MEHbXpFcV+zbXJsIAG88847g+at/+Mf/2j09vaG148cOWI0NTVFjGlubjZqa2sj\ntjU0NBiHDx8Or/f09Bjbtm0Lrx88eNC4fPly+Gc7duwYNtff/va3iPVXX301vPyvf/3L+Pe//20Y\nxu3597dv3x4x9siRI4ZhGEZXV5exdevWiJ9t27Yt4v8NMIszA9zXXC6XnnjiiYhtfr9f1dXV4fVQ\nKKS0tDR97WtfG3FfFy9eVHd3t3bt2iXp9tlDV1dX+OeLFi3Sjh07tHr1ah05ckSLFy8O/+z69es6\ndOiQ3G63UlJS1NbWFrHvyZMnh5fHjh2rnp4eSbcnMbt7YrZFixZJkq5du6aOjo6IPMFgUK2traZn\nJQXuoAxwXzOGuNwza9Ysfe9734uYvKyvry/qY+fMmaP//ve/EZeUBj7O7XZr/Pjx6ujo0PXr1yN+\nIb///vv64Q9/GJ4t8+23346aU5IeeOABnThxQt/85jfD2/7zn/9oypQpys/PV25urioqKsI/C4VC\nQ+4HiIaJ6nDf2r17t06cOBG+Nv/www+roKBAXV1d+uCDD+R2u2UYhm7evKlvfetbevDBB+X3+3X0\n6FF1dXWpublZ06dPV2FhoR566CFJ0okTJ/T5558rJSVF/f39crlcqqysDD9nV1eXXnrpJVVUVETM\nPPnhhx+qvb1dLpdLvb29unjxoh5//HE98sgjqq2t1eHDh7Vp0ybl5ORo69atCoVCWr9+vSTp7Nmz\nOn36tNLT09XX16esrCw99thjkm4Xw7Fjx8LF1tnZqZUrV2r8+PFxeY1x/6AMAAB86AwAQBkAAEQZ\nAABEGQAARBkAAEQZAABEGQAARBkAACT9P5YP/hym4/A8AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a14c668>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='ratio_st_in_pt', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1a46ffd0>"
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF+9JREFUeJzt3X1QFPfhx/HPcYDKUBVEJMi0RNGhRFtjlBrrA41EqW2F\nsYkjJdF0qrZjf/+0/3Sm7b+dzjR/ZKZpnCTNTNNqSupTSVrUGEkwKnXQWJ8OHxoJSdWeiBw4hzmB\nu/394Xjh5OH2zO7tnbxfM87sftlbPh47fLjd2++5DMMwBAAY1VKcDgAAcB5lAACgDAAAlAEAQJQB\nAEBxKINQKKRgMGj3twEAfAGpdu583759+vjjj1VZWan8/PwRt/V6vWpoaJDb7VZZWZny8vLsjAYA\nGMBl930GLS0tmjhxYtQyePvtt7Vq1SpJUl1dnaqqquyMBQAYIGGuGWRkZISX09PTHUwCAKNPwpTB\nwBcolAEAxFfClAEXmQHAObZeQB6Ox+ORy+VSSUlJeCwQCAy5PJyGhgZbsgHAg27ZsmWDxmwtgwMH\nDqi1tVXjxo1TYWGhFi9eLElqamoaVAalpaWqra2VYRgqLy83tf+5c+fakluSqqurVVtba9v+7UZ+\n5yRzdsmZ/NXV1aretDWu39OM2lefNfVcVFdX6/Vlr8QhUWyea/jxoPwnTpwYcltby2C4X+obN24c\nNJafn6/q6mo74wAAhpEw1wwAAM6hDAAAlAEAgDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgy\nAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIA\nAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAEhKtXPnXq9X\nDQ0NcrvdKisrU15e3rDbtrW16fjx4wqFQlqwYIG+/OUv2xkNADCArWXQ3NysmpoaSVJdXZ2qqqqG\n3fbcuXN66qmnJEn19fWUAQDEka1lkJGREV5OT08fcdtgMKhQKCRJGjt2rJ2xAAD3sLUMDMMIL0cr\ng1mzZun555+Xy+XS2rVr7YwFALiHrWUQDAZNb3vq1Cn94he/kGEY2rVrl6nTRFlZWV8knuP7txv5\nnRPv7NXPPCNfR4d1+6uutmQ/WTk5qt22zZJ9OSWZj0PJfH5byyAQCAy57PF45HK5VFJSEh4bM2aM\nJMnlcmn8+PGm9u/z+SxK6sz+7UZ+58Q7u6+jQ2M2/Cyu39MM32svJPXPUUru41Ayn9/WMigtLVVt\nba0Mw1B5eXl4vKmpaVAZFBYWavfu3ZKk2bNn2xkLAHAPW8sgPz9/yJebGzduHDRWXFys4uJiO+MA\nAIbBTWcAAMoAAEAZAABEGQAARBkAAEQZAABEGQAAZPN9BgAwWjzX8GOnI3whlAEAWOD1Za84HWGQ\nWAqK00QAAMoAAEAZAABEGQAARBkAAPQFyqCnp8fKHAAAB0V9a2lLS8uQ40eOHBnycwkAAMknahk0\nNTVp0aJFunTpktrb2/Xoo4/qxIkTys7Ojkc+IC5+8tPN6u607uMNrfoM4QnZWXr5pS2W7AsYSdQy\nWLt2rTIzM3Xq1Cn98Ic/lCTNmTNH27dvtz0cEC/dnT65N1U4HWOQ7lf3OR0Bo0TUawaZmZmSpLS0\ntIjx9PR0exIBAOLO9AXkW7duKRQKSZKCwaBu3rxpWygAQHyZnpto5cqV2rZtm9LT09Xb26uVK1fa\nmQsAEEemyyA7O1vr1q2zMwsAwCExzVp67tw5Xbp0STNnztTMmTPtygQAiDPT1wwOHDggv9+v5cuX\nq7u7W42NjTbGAgDEk+ky8Pl8mj9/vtLT0zV//nx1dnbamQsAEEemy2C4t5LyriIASH6mrxn09PSo\no6NDOTk5unHjhgzDkCQ1NjZq1apVtgUcTf7vpz/Rjc5uy/Zn1V2wk7In6A8vvTziNpt/+hP5LMwu\nWZc/K3uCtkTJD+fVvvqs0xFGNdNl4PV69dprr2nq1Km6cuWKCgsLtWPHDl24cIEysMiNzm796vuJ\n90mkv9kV/Ze8r7Nb33rOiEOa2L3/urUlBXtUb9rqdIRBRlNBmf7Ns2TJEs2bN2/Q+PHjxy0NBACI\nP9PXDIYqgrvjBw4csCwQACD+LPlwG7/fb8VuAAAOsaQMXC6XFbsBADiEj70EAFAGAADKAAAgygAA\noBhnLR3OsmXLhhz3er1qaGiQ2+1WWVmZ8vLyRtxPW1ubTp48KbfbrYqKikGfrgYAsEdMZfDRRx+p\nt7dXknTs2DGtX79e0ucfjXmv5uZm1dTUSJLq6upUVVU17L67u7t169atEbcBANjD9Gmi3bt3y+/3\nKyUlJfwvmoyMjPBytM9M9ng8CgQC2r17ty5evGg2FgDAAqZfGaSlpWnOnDnh9eLi4qiPuTuZnRS9\nDD755BNNmTJFq1evVn19vYqKikwVDgDgizP92/beu4zb29ujPiYYDJoOkpqaqrKyMknS1KlT5fP5\nTD8WAPDFmH5l0NraqldeeUWTJk2SYRi6cOGCfv3rX4/4mEAgMOSyx+ORy+VSSUlJeGzGjBlqbW1V\nUVGRfD6fZs2aFTVTVlZWxPq6mhq137hh9r8UlVVTKOdOmqS/vPGGJftyyr3PdbJJ5vxms99+7QWb\nk9yfZH7updGT33QZLF++XPPnzw+vm/nYy9LSUtXW1sowDJWXl4fHm5qaBpXBnDlztHPnTp09e1Y5\nOTlKTY0e7d5XD+03bmhr5TMm/jfx9exb25L+lQ75nWM2+5gNP7M5Sexuv/ZCUj/3UnIfO5L5/KbL\nYGARSAqf0hlJfn7+kH9db9y4ccjtn3rqKbNxAAAWuu8rtNeuXbMyBwDAQfddBkePHrUyBwDAQVFP\nE3V0dCgrKyvivJNhGOrp6bE1GAAgfqKWwfbt2/X0009r27Ztmjt3bvjeAU4TAcCDI2oZbN68WZI0\nffp0LV26NDze1dVlXyoAQFyZvmZQUVERsV5UVGR5GACAM0yXwb3TSQy8KWzgDWUAgORjyeQ/+/fv\nt2I3AACHWFIGoVDIit0AABxiSRm4XC4rdgMAcAhzRAMAKAMAAGUAAJBFZTBmzBgrdgMAcMh9l0F/\nf394+d4b0gAAycV0Gdz7MZdvv/225WEAAM4wXQb/+te/ItbT0tIsDwMAcMZ9nya6O3spACD5RZ21\ntKmpSb29vWptbdXBgwcl3ble0NfXZ3s4AEB8RC2D4uJiBYNBXblyRY888ogMw5Db7VZ2dnY88o06\nv9nVH32jBPX+68l9J3rw1X1ORxi1srNzVPvqs07HGCQ7O8fUdjlZOXqu4cc2p4ldTpa5/JKJMrj7\nS//JJ59UTo75HeP+/Or7UX8kcWe2oL71XGKeOjRbUu5NifeuuNFSUC+99KJl+6qurlZtba1l+zPj\nxS3JnV+K4ZpBbm6unTkAAA4yXQaXLl2S3++X3+/X3r17dfPmTTtzAQDiyHQZvP/+++rt7dW7776r\nxx9/XPX19XbmAgDEkekymDJlirKzszVu3DhNnDhRGRkZduYCAMSR6TLo6+tTMBhUauqdC5zcZwAA\nDw7TZZCSkqK//OUvmjNnjjo6OuT3++3MBQCII9PvY6yqqlIoFFJKSoouX76skpISO3MBAOIopuko\nUlLubF5QUKC5c+eGxy9fvmxtKgBAXFnyeQYffvihFbsBADjEkjLgYjIAJDdLysDlSu45aQBgtOMz\nkAEAlAEAgGsGAABZVAYFBQVW7AYA4JCYyqCjo0PNzc3q6OiIGJ83b56loQAA8WW6DE6fPq3jx48r\nPz9fzc3NOn36dNTHeL1evfHGG3rzzTfl9Xqjbh8IBPTb3/5WV69eNRsLAGAB02Vw8eJFVVRUqKCg\nQCtXrtSFCxeiPqa5uVk1NTVau3atjh49GnX7xsZGrVixwmwkAIBFTJdBWlpaxHp6enrUxwyc5jra\n9j6fTxkZGRo7dqzZSAAAi5gug88++2zE9aEMfJdRtDI4fPiwFi1aZDYOAMBCpmctLSsr05///GdN\nmTJF7e3tWr58edTHBINB00GuXbumf/zjH7p27ZoKCwuVn59v+rEAgC/GdBnk5eVp/fr18vv9yszM\nNPWYQCAw5LLH45HL5YqYBnvDhg2SpJaWFk2cONHU/rOyskxtlwiSKetQyO8cM9mzcnLke+2FOKSJ\nTVZOjiPPfTL/vCVn8psug7vMFoEklZaWqra2VoZhqLy8PDze1NQ0qAwkqbOzU4cPHzb9ysDn85kP\n7rBkyjoU8jvHTPYtL75o2ferrq5WbW2tZftz4rlP5p+35Ez+mMvgroMHD2rp0qUjbpOfn6/q6upB\n4xs3bhxy++zsbG3atOl+IwEA7tN934Hc1dVlZQ4AgIOivjLYsmWL1qxZoz179oTfKmoYhi5cuKDK\nykrbAwIA7Be1DJ5++mlNnDhR48ePV1VVVXi8rq7O1mAAgPiJWgaTJ0+WJBUVFUWM3/08ZABA8jP9\nG33WrFkR6xUVFZaHAQA4477/vDczHQUAIDmYLoP+/v6I9ffee8/yMAAAZ5gug71790as+/1+y8MA\nAJxhugximWcIAJBcor6bqKurS319fbp161b4E876+/t5ZQAAD5CoZeDxeNTX1yev1yuPxyPDMJSa\nmqrvfe978cgHAIiDqGXwzW9+U5KUm5s7aGK5RPTsW9ucjoAkNCE7S92v7nM6xiATspN79k0kD9MT\n1SVDEUjS1spnnI4wCAWV+F5+aYtl+7J61k8gHu77PoMPP/zQyhwAAAeZfmVw5swZnTt3Tl1dXRo/\nfrza2tr02GOP2ZkNABAnpl8ZnD9/XmvWrNFDDz2ktWvXavr06XbmAgDEkekySEtLkySFQqGIdQBA\n8jNdBr29vZLu3HwWCoVkGIZtoQAA8WW6DMrKyiRJixcv1l//+ldNmDDBrkwAgDgzfQG5q6tLubm5\nmjx5sp55JvHevgkAuH+mXxm0tLRErLe1tVmdBQDgENNlcOvWrYj1U6dOWR4GAOAM06eJFi5cqHfe\neUcrVqywMw+SVFb2BL3/erfTMYaUlc31LSAa02WwdetWFRcXa+fOnTIMQxcuXFBlZaWd2ZBEtrz0\nsqX7Y0oHIL5Ml8G3v/1tzZs3L7y+Z88eWwIBAOLP9DWDgUUgSStXrrQ8DADAGfc9UR0A4MFBGQAA\nKAMAAGUAAFAM7yaC/SZlT9BvdiXee/Un8T594IFHGSSQP1j4Xn3epw8gFpwmAgBQBgAAygAAIMoA\nACDKAAAgm99N5PV61dDQILfbrbKyMuXl5Q27bUtLi86fP69QKKQFCxaooKDAzmgAgAFsLYPm5mbV\n1NRIkurq6lRVVTXsth0dHVq9erWkOzOiUgYAED+2nibKyMgIL6enp4+47ZIlS+yMAgAYga1lYBhG\neDlaGdy1f//+QdNlAwDsZetpomAwGNP2jY2NKioqUm5urqnts7Ky7ieWI5zImkzPz1CSOX8yZ5fI\n7zQn8ttaBoFAYMhlj8cjl8ulkpKS8NihQ4eUm5uradOmmd6/z+ezJmgcOJE1mZ6foSRz/mTOLpHf\naU7kt7UMSktLVVtbK8MwVF5eHh5vamqKKIP29nYdOnRIxcXFOn/+vPx+v9atW2dnNADAALaWQX5+\nvqqrqweNb9y4MWI9NzdXv/zlL+2MAgAYATedAQAoAwAAZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQ\nZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBl\nAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAkJRq\n5869Xq8aGhrkdrtVVlamvLw8S7YFAFjL1lcGzc3Nqqmp0dq1a3X06FHLtgUAWMvWMsjIyAgvp6en\nW7YtAMBatpaBYRjh5Wi/4GPZFgBgLVvLIBgM2rItAMBaLmPgn+QWq6urU1VVlSTpn//8p7773e9K\nkjwej1wul0pKSqJuO5yGhgabUgPAg23ZsmWDxmwtg6tXr+rgwYMyDEPl5eXKzc2VJP3xj3+Uy+XS\nhg0bom4LALCfrWUAAEgO3HQGAKAMAACjvAxCoVDSvYspGTMPlOz5gQeVrdNRJLJ9+/bp448/VmVl\npfLz80fcNlGmyoglc1NTk65du6a0tDR99atf1fTp00ccj4dY8ktSW1ubTp48KbfbrYqKCqWlpen5\n55/XzJkzZRiGbty4oR/96EdxSD5YS0uLzp8/r1AopAULFqigoGDYbRPl+IklcyIeP7HklxL7+Inl\neYzb8WOMYh6Px7hy5UrU7d56663w8t///nc7I0VlNvNA9fX1MY3byWz+rq4uw+PxDBr/7LPPwsv7\n9u2zNFssDh48GF6O9jwmyvETS+aBEuX4iSV/oh8/AyXK8TOqTxOZlaxTZbS1tel3v/udAoGAqfFE\n4vF4FAgEtHv3bl28eDE8PnbsWEnS9evXNXnyZKfiacmSJaa3TZTjJ5bMUuIdP7HkT/TjRzL/PMbr\n+Bm1p4liYSTpVBmFhYX6+c9/rps3b5oaTySffPKJpkyZotWrV6u+vl5FRUVKSfn8bxePx6PHH3/c\nwYR37N+/X/PmzRtxm0Q7fsxklhL3+DGTPxmOH7PPY7yOH14ZmJDMFzxTU1N19uxZ0+OJIjU1VWVl\nZZKkqVOnyufzRXw9EAhozJgxDiT7XGNjo4qKiqLeIJlIx4/ZzHcl2vFjNn8yHD+SuecxXscPZXAP\nj8ejlpaWiLGBL+MS8dTKUJk7OzvDy1evXo067qSh8s+YMUOtra2SJJ/PpwkTJoS/FgqFIv7Kc8Kh\nQ4eUm5uradOmRYwn8vETS+ZEPH5iyZ/ox89wz6OTx8+oPU104MABtba2aty4cSosLNTixYsl3bnK\nf++8SaWlpaqtrQ1PleGUWDIfOXJEt2/fVn9/v772ta9FHU+0/HPmzNHOnTt19uxZ5eTkKDX180P1\n008/1YwZM+KafaD29nYdOnRIxcXFOn/+vPx+v9atWycpcY+fWDMn2vETa/5EPn6k4Z9HJ48fpqMA\nAHCaCABAGQAARBkAAEQZAABEGQAARBkAADSK7zMAhnPw4EFdvnxZBQUFWrp0qdNxgLigDIB7LF26\nVB0dHfJ4PE5HAeKGMsAD6/jx49q7d6+effZZNTU1KSMjQyUlJcrJydH+/fuVkZGh/v5+5efna8GC\nBRGPHe5ezA8++EDXr19XWlqabt++rYqKCn3pS1/SmTNntHv3bi1btkyLFi0Krz/xxBNavHix2tvb\n1djYqPT0dPX19am4uFizZ8+WJB07dkz79+/XtGnTlJmZqb6+Pq1evTr8PT/66COdPn1aqampCoVC\nunr1qjZv3ixJunjxok6ePKmxY8fq9u3bmjdvnh5++GGbnlE80GybHBtIANu3bx80b/2f/vQno6+v\nL7x+5MgRo62tLWKb9vZ2o7GxMWKspaXFOHz4cHi9t7fX2LFjR3j9gw8+MC5fvhz+2q5du4bN9be/\n/S1i/eWXXw4v//vf/zb+85//GIZxZ/79nTt3Rmx75MgRwzAMo6enx9i6dWvE13bs2BHxfwPM4pUB\nHmgul0srVqyIGPN6vaqvrw+vh0IhpaWl6Stf+cqI+7p06ZJu376tt956S9KdVw89PT3hry9cuFC7\ndu3SmjVrdOTIES1atCj8tevXr+vQoUNyu91KSUlRV1dXxL4nTZoUXh47dqx6e3sl3ZnE7N6J2RYu\nXChJunbtmm7evBmRJxAIqLOz0/SspMBdlAEeaMYQp3tmzpyp73znOxGTl/X390d97OzZs/W///0v\n4pTSwMe53W5lZmbq5s2bun79esQv5HfeeUc/+MEPwrNlvvnmm1FzStJDDz2kEydO6NFHHw2P/fe/\n/9XkyZNVUFCgvLw8VVZWhr8WCoWG3A8QDRPV4YG1Z88enThxInxu/utf/7oKCwvV09Ojd999V263\nW4Zh6NatW/rGN76hhx9+WF6vV0ePHlVPT4/a29s1bdo0FRUV6ZFHHpEknThxQp9++qlSUlIUDAbl\ncrlUVVUV/p49PT36/e9/r8rKyoiZJ9977z11d3fL5XKpr69Ply5d0pNPPqnHHntMjY2NOnz4sDZt\n2qTc3Fxt3bpVoVBI69evlySdO3dOZ86cUXp6uvr7+5Wdna0nnnhC0p1iOHbsWLjY/H6/Vq1apczM\nzLg8x3hwUAYAAG46AwBQBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAASf8PT4US1m73zacAAAAA\nSUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a46f2b0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='ratio_st_in_pd', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ab36b38>"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFYBJREFUeJzt3V9QVOfhxvFnWUDcoUaMINkwLVF0KNHWGrI1ViOjaGja\nBsZJHCltvGjIRS5705lO7zvT3PWi/2c6rW1Jo/IjaVFDJAUR6qCxRl1EGwlJ0S5IhDCLLn92z+/C\n8ZSVP3uW7Nmzi9/PTGbOeXn3+IQ5w8M5y3nXZRiGIQDAQy3D6QAAAOdRBgAAygAAQBkAAEQZAACU\nhDKIRCIKh8N2/zMAgM8h086DnzhxQh999JGqq6vl9XoXnBsIBNTa2iq3262KigoVFhbaGQ0AMIPL\n7ucMenp6tHLlyphl8Pbbb+uFF16QJDU1NammpsbOWACAGVLmPQOPx2NuZ2dnO5gEAB4+KVMGMy9Q\nKAMASK6UKQPeZAYA59j6BvJ8/H6/XC6XysrKzLFQKDTn9nxaW1ttyQYAS93u3btnjdlaBidPnlRf\nX5+WL1+u4uJi7dixQ5LU1dU1qwx8Pp8aGhpkGIYqKystHX/Lli225Jak2tpaNTQ02HZ8u5HfOemc\nXSK/0+zOf/78+TnHbS2D+X6o19fXzxrzer2qra21Mw4AYB4p854BAMA5lAEAgDIAAFAGAABRBgAA\nUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABR\nBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEG\nAABRBgAAUQYAAFEGAABJmXYePBAIqLW1VW63WxUVFSosLJx3bn9/v86dO6dIJKKtW7fqi1/8op3R\nAAAz2FoG3d3dqqurkyQ1NTWppqZm3rlXrlzRiy++KElqbm6mDAAgiWwtA4/HY25nZ2cvODccDisS\niUiScnJy7IwFAHiArWVgGIa5HasMNm7cqNdff10ul0sHDhywMxYA4AG2lkE4HLY894MPPtCPfvQj\nGYaho0ePWrpNlJeX93niOX58u5HfOemcXSK/05zIb2sZhEKhObf9fr9cLpfKysrMsWXLlkmSXC6X\nVqxYYen4IyMjCUrqzPHtRn7npHN2ifxOcyK/rWXg8/nU0NAgwzBUWVlpjnd1dc0qg+LiYjU2NkqS\nNm3aZGcsAMADbC0Dr9er2traWeP19fWzxkpLS1VaWmpnHADAPHjoDABAGQAAKAMAgCgDAIAoAwCA\nKAMAgCgDAIAoAwCAKAMAgCgDAIAoAwCAKAMAgCgDAIAoAwCAKAMAgOIog87OTjtzAAAcZLkMhoeH\no/bPnTuX8DAAAGdYLoO7d+9qcnLS3L9x44YtgQAAyWf5Yy+Li4v1m9/8Rrt27ZIkDQ4O2hYKAJBc\nlq8MLl68aBYBAGBpsXxlsHfvXhUXF5v7wWDQjjwAAAdYvjKYWQSS5PP5Ep0FAOAQy1cGktTb26vr\n169r/fr12rBhg12ZAABJZvnKoK2tTWNjY9qzZ49GR0d16tQpO3MBAJLIchl8+umn8vl8ys7Ols/n\nm/XcAQAgfVkuA5fLFbWfmRnXHSYAQAqzXAZut9u8GhgeHtbExIRtoQAAyRXz1/tf/vKXys/PVzgc\n1u9+9zsVFRVpYGBAy5cvT0Y+AEASxCyDp59+WuXl5bPGWZsIAJaOmLeJ5iqCB8dPnjyZuEQAgKRL\nyOcZ8DQyAKS3hJTBg39pBABIL3zSGQCAMgAAUAYAAFEGAADFuWrpfHbv3j3neCAQUGtrq9xutyoq\nKlRYWLjgcfr7+3XhwgW53W5VVVUpKysrEfEAADHEVQYffvih+TnIZ8+e1cGDByVJubm5c87v7u5W\nXV2dJKmpqUk1NTXzHvuzzz7TnTt3FpwDALCH5dtEjY2NCgaDysjIMP+LxePxmNvZ2dkLzvX7/QqF\nQmpsbNS1a9esxgIAJIDlK4OsrCxt3rzZ3C8tLY35GsMwzO1YZfDxxx9rzZo12rdvn5qbm1VSUmKp\ncAAAn5/ln7YPPmU8NDQU8zXhcNhykMzMTFVUVEiSHn/8cY2MjFh+LQDg87F8ZdDX16df//rXevTR\nR2UYhq5evaqf/OQnC74mFArNue33++VyuVRWVmaOrV+/Xn19fSopKdHIyIg2btwYM1NeXp7V+Iti\n9/HtRn7npHN2ifxOcyK/5TLYu3evnn76aXO/ra0t5mt8Pp8aGhpkGIYqKyvN8a6urlllsHnzZh05\nckSXL1/W6tWrLX14jt1XD+l+dUJ+56Rzdon8TnMiv+UymFkEksxbOgvxer2qra2dNV5fXz/n/Bdf\nfNFqHABAAi36HdrBwcFE5gAAOGjRZXDmzJlE5gAAOCjmbaLh4WHl5eVF3cMyDEPj4+O2BgMAJE/M\nMnjzzTf10ksv6U9/+pO2bNliPjvAbSIAWDpilsFrr70mSVq3bp127txpjo+OjtqXCgCQVJbfM6iq\nqoraLykpSXgYAIAzLJfBg8tJzHwobOYDZQCA9JOQxX9aWloScRgAgEMSUgaRSCQRhwEAOCQhZeBy\nuRJxGACAQ1gjGgBAGQAAKAMAgBJUBsuWLUvEYQAADll0GUxPT5vbDz6QBgBIL5bL4MGPuXz77bcT\nHgYA4AzLZfDPf/4zaj8rKyvhYQAAzlj0baL7q5cCANJfzFVLu7q6NDk5qb6+PrW3t0u6937B1NSU\n7eEAAMkRswxKS0sVDod148YNPfnkkzIMQ263W6tWrUpGPgBAEsQsg/s/9Pfs2aPVq1fbHggAkHyW\n3zMoKCiwMwcAwEGWy+D69esKBoMKBoM6fvy4xsbG7MwFAEgiy2Xwj3/8Q5OTk3r33Xf1zDPPqLm5\n2c5cAIAkslwGa9as0apVq7R8+XKtXLlSHo/HzlwAgCSyXAZTU1MKh8PKzLz3njPPGQDA0mG5DDIy\nMvTHP/5Rmzdv1vDwsILBoJ25AABJFPNPS++rqalRJBJRRkaGBgYGVFZWZmcuAEASxbUcRUbGvelF\nRUXasmWLOT4wMJDYVACApErI5xm8//77iTgMAMAhCSkD3kwGgPSWkDJwuVyJOAwAwCF8BjIAgDIA\nAPCeAQBACSqDoqKiRBwGAOCQuMpgeHhY3d3dGh4ejhovLy9PaCgAQHJZLoOLFy/q3Llz8nq96u7u\n1sWLF2O+JhAI6M9//rPeeOMNBQKBmPNDoZB++tOf6ubNm1ZjAQASwHIZXLt2TVVVVSoqKtLzzz+v\nq1evxnxNd3e36urqdODAAZ05cybm/La2Nj333HNWIwEAEsRyGWRlZUXtZ2dnx3zNzGWuY80fGRmR\nx+NRTk6O1UgAgASxXAZ3795dcH8uM//KKFYZnD59Wtu3b7caBwCQQJZXLa2oqNAf/vAHrVmzRkND\nQ9q7d2/M14TDYctBBgcH9be//U2Dg4MqLi6W1+u1/FoAwOdjuQwKCwt18OBBBYNB5ebmWnpNKBSa\nc9vv98vlckUtg/3KK69Iknp6erRy5UpLx8/Ly7M0b7HsPr7dyO+cdM4ukd9pTuS3XAb3WS0CSfL5\nfGpoaJBhGKqsrDTHu7q6ZpWBJN2+fVunT5+2fGUwMjJiPfgi2H18u5HfOemcXSK/05zIH3cZ3Nfe\n3q6dO3cuOMfr9aq2tnbWeH19/ZzzV61apVdffXWxkQAAi7ToJ5BHR0cTmQMA4KCYVwa/+MUvtH//\nfh07dsz8U1HDMHT16lVVV1fbHhAAYL+YZfDSSy9p5cqVWrFihWpqaszxpqYmW4MBAJInZhnk5+dL\nkkpKSqLG738eMgAg/Vn+ib5x48ao/aqqqoSHAQA4Y9G/3ltZjgIAkB4sl8H09HTU/nvvvZfwMAAA\nZ1gug+PHj0ftB4PBhIcBADjDchnEs84QACC9xPxrotHRUU1NTenOnTvmJ5xNT09zZQAAS0jMMvD7\n/ZqamlIgEJDf75dhGMrMzNR3vvOdZOQDACRBzDL4xje+IUkqKCiYtbAcAGBpsPyeAUUAAEvXop8z\neP/99xOZAwDgIMtLWF+6dElXrlzR6OioVqxYof7+fj311FN2ZgMAJInlK4Pe3l7t379fjz32mA4c\nOKB169bZmQsAkESWyyArK0uSFIlEovYBAOnPchlMTk5KuvfwWSQSkWEYtoUCACSX5TKoqKiQJO3Y\nsUN/+ctf9Mgjj9iVCQCQZJbfQB4dHVVBQYHy8/P1ve99z85MAIAks3xl0NPTE7Xf39+f6CwAAIdY\nLoM7d+5E7X/wwQcJDwMAcIblMti2bZveeecdO7MAABxiuQwOHTqksbExHTlyRIcPH9alS5fszAUA\nSCLLbyB/85vfVHl5ubl/7NgxWwIBAJLP8pXBzCKQpOeffz7hYQAAzlj0QnUAgKWDMgAAUAYAAMoA\nACDKAAAgygAAIMoAACDKAAAgygAAIMoAACDKAACgOBaqW4xAIKDW1la53W5VVFSosLBw3rk9PT3q\n7e1VJBLR1q1bVVRUZGc0AMAMtpZBd3e36urqJElNTU2qqamZd+7w8LD27dsn6d6KqJQBACSPrbeJ\nPB6PuZ2dnb3g3GeffdbOKACABdhaBoZhmNuxyuC+lpaWWctlAwDsZettonA4HNf8trY2lZSUqKCg\nwNL8vLy8xcSyzO7j2438zknn7BL5neZEflvLIBQKzbnt9/vlcrlUVlZmjnV0dKigoEBr1661fPyR\nkZHEBHXo+HYjv3PSObtEfqc5kd/WMvD5fGpoaJBhGKqsrDTHu7q6ospgaGhIHR0dKi0tVW9vr4LB\noF5++WU7owEAZrC1DLxer2pra2eN19fXR+0XFBToxz/+sZ1RAAAL4KEzAABlAACgDAAAogwAAKIM\nAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwA\nAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAA\nogwAAKIMAACiDAAAkjLtPHggEFBra6vcbrcqKipUWFiYkLkAgMSy9cqgu7tbdXV1OnDggM6cOZOw\nuQCAxLK1DDwej7mdnZ2dsLkAgMSytQwMwzC3Y/2Aj2cuACCxbC2DcDhsy1wAQGK5jJm/kidYU1OT\nampqJEl///vf9e1vf1uS5Pf75XK5VFZWFnPufFpbW21KDQBL2+7du2eN2VoGN2/eVHt7uwzDUGVl\npQoKCiRJv/3tb+VyufTKK6/EnAsAsJ+tZQAASA88dAYAoAwAAA95GUQikbT7K6Z0zDxTuucHlipb\nl6NIZSdOnNBHH32k6upqeb3eBeemylIZ8WTu6urS4OCgsrKy9OUvf1nr1q1bcDwZ4skvSf39/bpw\n4YLcbreqqqqUlZWl119/XRs2bJBhGPr000/1gx/8IAnJZ+vp6VFvb68ikYi2bt2qoqKieeemyvkT\nT+ZUPH/iyS+l9vkTz/cxaeeP8RDz+/3GjRs3Ys576623zO3/+7//szNSTFYzz9Tc3BzXuJ2s5h8d\nHTX8fv+s8bt375rbJ06cSGi2eLS3t5vbsb6PqXL+xJN5plQ5f+LJn+rnz0ypcv481LeJrErXpTL6\n+/v1s5/9TKFQyNJ4KvH7/QqFQmpsbNS1a9fM8ZycHEnSrVu3lJ+f71Q8Pfvss5bnpsr5E09mKfXO\nn3jyp/r5I1n/Pibr/HlobxPFw0jTpTKKi4v1wx/+UGNjY5bGU8nHH3+sNWvWaN++fWpublZJSYky\nMv73u4vf79czzzzjYMJ7WlpaVF5evuCcVDt/rGSWUvf8sZI/Hc4fq9/HZJ0/XBlYkM5veGZmZury\n5cuWx1NFZmamKioqJEmPP/64RkZGor4eCoW0bNkyB5L9T1tbm0pKSmI+IJlK54/VzPel2vljNX86\nnD+Ste9jss4fyuABfr9fPT09UWMzL+NS8dbKXJlv375tbt+8eTPmuJPmyr9+/Xr19fVJkkZGRvTI\nI4+YX4tEIlG/5Tmho6NDBQUFWrt2bdR4Kp8/8WROxfMnnvypfv7M93108vx5aG8TnTx5Un19fVq+\nfLmKi4u1Y8cOSffe5X9w3SSfz6eGhgZzqQynxJO5s7NTExMTmp6e1le+8pWY46mWf/PmzTpy5Igu\nX76s1atXKzPzf6fqJ598ovXr1yc1+0xDQ0Pq6OhQaWmpent7FQwG9fLLL0tK3fMn3sypdv7Emz+V\nzx9p/u+jk+cPy1EAALhNBACgDAAAogwAAKIMAACiDAAAogwAAHqInzMA5tPe3q6BgQEVFRVp586d\nTscBkoIyAB6wc+dODQ8Py+/3Ox0FSBrKAEvWuXPndPz4cX3/+99XV1eXPB6PysrKtHr1arW0tMjj\n8Wh6elper1dbt26Neu18z2KeOnVKt27dUlZWliYmJlRVVaUvfOELunTpkhobG7V7925t377d3N+1\na5d27NihoaEhtbW1KTs7W1NTUyotLdWmTZskSWfPnlVLS4vWrl2r3NxcTU1Nad++fea/+eGHH+ri\nxYvKzMxUJBLRzZs39dprr0mSrl27pgsXLignJ0cTExMqLy/XE088YdN3FEuabYtjAyngzTffnLVu\n/e9//3tjamrK3O/s7DT6+/uj5gwNDRltbW1RYz09Pcbp06fN/cnJSePw4cPm/qlTp4yBgQHza0eP\nHp0311//+teo/V/96lfm9r/+9S/j3//+t2EY99bfP3LkSNTczs5OwzAMY3x83Dh06FDU1w4fPhz1\n/wZYxZUBljSXy6XnnnsuaiwQCKi5udncj0QiysrK0pe+9KUFj3X9+nVNTEzorbfeknTv6mF8fNz8\n+rZt23T06FHt379fnZ2d2r59u/m1W7duqaOjQ263WxkZGRodHY069qOPPmpu5+TkaHJyUtK9Rcwe\nXJht27ZtkqTBwUGNjY1F5QmFQrp9+7blVUmB+ygDLGnGHLd7NmzYoG9961tRi5dNT0/HfO2mTZv0\n3//+N+qW0szXud1u5ebmamxsTLdu3Yr6gfzOO+/ou9/9rrla5htvvBEzpyQ99thjOn/+vL72ta+Z\nY//5z3+Un5+voqIiFRYWqrq62vxaJBKZ8zhALCxUhyXr2LFjOn/+vHlv/qtf/aqKi4s1Pj6ud999\nV263W4Zh6M6dO/r617+uJ554QoFAQGfOnNH4+LiGhoa0du1alZSU6Mknn5QknT9/Xp988okyMjIU\nDoflcrlUU1Nj/pvj4+P6+c9/rurq6qiVJ9977z199tlncrlcmpqa0vXr17Vnzx499dRTamtr0+nT\np/Xqq6+qoKBAhw4dUiQS0cGDByVJV65c0aVLl5Sdna3p6WmtWrVKu3btknSvGM6ePWsWWzAY1Asv\nvKDc3NykfI+xdFAGAAAeOgMAUAYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAEn/D+P3YHmZhl8q\nAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a46f320>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# not very useful\n",
    "sns.boxplot(x='relevance', y='ratio_st_in_br', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1b161dd8>"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFz9JREFUeJzt3X1QFPfhx/HPcYDKWBUEJIRpiKJDibbWGGqsRhoxobYN\njE0cKUnsTNV0zC9/tP90pu2/nXaaPzrT/pImaWb6QFLS+FCSFjVGEnyiDBpr1EO0kWBqLODJIQPm\neLjb3x/+vHLycHu4e3uH79dMZna/fG/55Ny5D7t7t+cyDMMQAOCOluR0AACA8ygDAABlAACgDAAA\nogwAAIpBGQSDQQUCAbt/DQDgNiTbufF9+/bp448/Vnl5uXJzcyec29HRofr6erndbpWUlCgnJ8fO\naACAEVx2f86gpaVFc+bMiVgGb7/9th577DFJUm1trSoqKuyMBQAYIW6uGaSlpYWWU1NTHUwCAHee\nuCmDkQcolAEAxFbclAEXmQHAObZeQB6Px+ORy+VSUVFRaMzv94+5PJ76+npbsgHAVLd27dpRY7aW\nwYEDB9TW1qYZM2YoPz9fq1evliQ1NjaOKoPi4mLV1NTIMAyVlpaa2v6yZcssz1xZWRm2XlNTY/nv\niIXKysqEzS4ldv5Ezi6R32l25z9x4sSY47aWwXgv6lu3bh01lpubO+qF2Ak1NTWhHIm8QwFANOLm\nmgEAwDmOXDOIdxwRALjTcGQAAKAMAACUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQB\nAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEA\nQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAEBSsp0b7+joUH19vdxut0pK\nSpSTkzPu3Pb2dh0/flzBYFArVqzQ5z//eTujAQBGsLUMmpubVVVVJUmqra1VRUXFuHPPnj2rxx9/\nXJJUV1dHGQBADNlaBmlpaaHl1NTUCecGAgEFg0FJ0vTp0+2MBQC4ha1lYBhGaDlSGSxevFjPP/+8\nXC6XNm3aZGcsAMAtbC2DQCBgeu6HH36oH/3oRzIMQ7t27TJ1mig9PT3qTM8884wuXrwY9ePGcs89\n9+jll1+2ZFt2mMzzE08SOX8iZ5fI7zQn8ttaBn6/f8xlj8cjl8uloqKi0Ni0adMkSS6XS7NmzTK1\nfZ/PF3WmX/ziF6bmVVZWqqamxpYMsRLP2cxI5PyJnF0iv9OcyG9rGRQXF6umpkaGYai0tDQ03tjY\nOKoM8vPztXv3bknSkiVL7IwFALiFrWWQm5urysrKUeNbt24dNVZYWKjCwkI74wAAxsGHzgAAlAEA\ngDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAOg2yqCz\ns9PKHAAAB026DJqamqzMAQBwUMRvOvN6vaPGDMNQf3+/LYEAALEXsQyqq6u1bNkyGYYRNs5pIgCY\nOiKWwYIFC7RmzZpR4z09PbYEAgDEXsRrBuvXrx9zvKCgwPIwAABnRCyD5OSxDx4WL15seRgAgDMi\nniYaqbW1VRcuXNDChQu1aNEiuzIBAGLM9FtLGxoa1Nvbq3Xr1qmnp0eHDh2yMxcAIIZMl8HVq1dV\nXFys1NRUFRcXj/mWUwBAYjJdBi6XK2x9vGsJAIDEY7oM3G536GjA6/VqYGDAtlAAgNiK+Of9b3/7\nW2VlZSkQCOjVV19VXl6eLl26pBkzZsQiHwAgBiKWwQMPPKDly5ePGj9+/LgtgQAAsRfxNNFYRXDr\n+IEDB6xLBACIOUu+z6Cvr8+KzQAAHGJJGdz6TiMAQGLhm84AAJQBAIAyAACIMgAAKMq7lo5n7dq1\nY453dHSovr5ebrdbJSUlysnJmXA77e3tOnnypNxut8rKypSSkmJFPABABFGVwUcffaTBwUFJ0rFj\nx7R582ZJ0syZM8ec39zcrKqqKklSbW2tKioqxt32tWvXdP369QnnAADsYfo00e7du9XX16ekpKTQ\nf5GkpaWFllNTUyec6/F45Pf7tXv3bp0/f95sLACABUwfGaSkpGjp0qWh9cLCwoiPMQwjtBypDC5e\nvKh58+Zpw4YNqqurU0FBganCAQDcPtOvtrd+yrirqyviYwKBgOkgycnJKikpkSTdfffd8vl8ph8L\nALg9po8M2tra9PLLL2vu3LkyDEPnzp3TT3/60wkf4/f7x1z2eDxyuVwqKioKjS1cuFBtbW0qKCiQ\nz+cz9R3L6enpZuNPit3btxv5nZPI2SXyO82J/KbL4JFHHtEDDzwQWm9oaIj4mOLiYtXU1MgwDJWW\nlobGGxsbR5XB0qVLtXPnTp05c0aZmZmmvjzH7qOHRD86Ib9zEjm7RH6nOZHfdBmMLAJJoVM6E8nN\nzVVlZeWo8a1bt445//HHHzcbBwBgoUlfoe3s7LQyBwDAQZMug6amJitzAAAcFPE0kdfrVXp6etg5\nLMMw1N/fb2swAEDsRCyDN998U0888YRee+01LVu2LPTZAU4TAcDUEbEMtm/fLklasGCB1qxZExrv\n6emxLxUAIKZMXzMoKysLWy8oKLA8DADAGabL4NbbSYz8UNjID5QBABKPJTf/2b9/vxWbAQA4xJIy\nCAaDVmwGAOAQS8rA5XJZsRkAgEO4RzQAgDIAAFAGAABZVAbTpk2zYjMAAIdMugyGh4dDy7d+IA0A\nkFhMl8GtX3P59ttvWx4GAOAM02Xwj3/8I2w9JSXF8jAAAGdM+jTRzbuXAgASX8S7ljY2NmpwcFBt\nbW06ePCgpBvXC4aGhmwPBwCIjYhlUFhYqEAgoE8//VT33XefDMOQ2+1WRkZGLPIhQWx/9vvydV+z\ndJtjfX/2ZKRnzNaLL7w04ZzvP7td17qt+xJyq7LPzkjXSy+8GHHe95/9H13rvmrJ75SszD9XL73w\nvxHnPfvsc+ru9lryOyXr8mdkZOqFF34Tcd5z25+T1xd/+TPTM/WbFyPnl0yUwc0X/XXr1ikzM/P2\nkmHK8nVf09e+G5+nDt//Q+SSutbtk3tb/L0r7tor+8zN676qaVt+YHOa6F179Vem5nV3e1W5rdrm\nNNGreeUpU/O8Pq/+sPZlm9NE77v1z5iea/qaQXZ29qTCAADin+kyuHDhgvr6+tTX16e9e/eqt7fX\nzlwAgBgyXQbvv/++BgcH9e677+rBBx9UXV2dnbkAADFkugzmzZunjIwMzZgxQ3PmzFFaWpqduQAA\nMWS6DIaGhhQIBJScfOOaM58zAICpw3QZJCUl6U9/+pOWLl0qr9ervr4+O3MBAGIo4ltLb6qoqFAw\nGFRSUpIuXbqkoqIiO3MBAGIoqttRJCXdmJ6Xl6dly5aFxi9dumRtKgBATFnyfQYffPCBFZsBADjE\nkjLgYjIAJDZLysDlclmxGQCAQ/gOZAAAZQAA4JoBAEAWlUFeXp4VmwEAOCSqMvB6vWpubpbXG/4l\nDsuXL7c0FAAgtkyXwalTp3T8+HHl5uaqublZp06diviYjo4Ovf7663rjjTfU0dERcb7f79fPf/5z\nXb582WwsAIAFTJfB+fPnVVZWpry8PK1fv17nzp2L+Jjm5mZVVVVp06ZNampqiji/oaFBjz76qNlI\nAACLmC6DlJSUsPXU1NSIjxl5m+tI830+n9LS0jR9+nSzkQAAFjFdBp999tmE62MZ+S6jSGVw5MgR\nrVq1ymwcAICFTN+1tKSkRH/84x81b948dXV16ZFHHon4mEAgYDpIZ2en/va3v6mzs1P5+fnKzc01\n/VgAwO0xXQY5OTnavHmz+vr6NHPmTFOP8fv9Yy57PB65XK6w22Bv2bJFktTS0qI5c+aY2n56erqp\neZNl9/Zv9WRVpbxXfZZtr7Ky0pLtZM5N12uv11iyLafE+t/SSomcXSK/08zmN10GN5ktAkkqLi5W\nTU2NDMNQaWlpaLyxsXFUGUhSd3e3jhw5YvrIwOez7oXTie3fynvVp598O+p/Etv9bJcv5s+F1RI5\nfyJnl8jvNLP5J/3Kc/DgQa1Zs2bCObm5uWP+dbp169Yx52dkZGjbtm2TjQQAmKRJfwK5p6fHyhwA\nAAdFPDJ48cUXtXHjRu3Zsyf0VlHDMHTu3DmVl5fbHhAAYL+IZfDEE09ozpw5mjVrlioqKkLjtbW1\ntgYDAMROxDLIysqSJBUUFISN3/w+ZABA4jP9ir548eKw9bKyMsvDAACcMek/783cjgIAkBhMl8Hw\n8HDY+nvvvWd5GACAM0yXwd69e8PW+/r6LA8DAHCG6TKI5j5DAIDEEvHdRD09PRoaGtL169dD33A2\nPDzMkQEATCERy8Dj8WhoaEgdHR3yeDwyDEPJycn61re+FYt8AIAYiFgGX/3qVyVJ2dnZo24sF2+e\n2/6svL5uy7Zn2V0/0zP0mxdfsGRbAGAH0zeqi/cikCSvr1vV5U86HWOUp956zekIADChSX/O4IMP\nPrAyBwDAQaaPDE6fPq2zZ8+qp6dHs2bNUnt7u+6//347swEAYsT0kUFra6s2btyou+66S5s2bdKC\nBQvszAUAiCHTZZCSkiJJCgaDYesAgMRnugwGBwcl3fjwWTAYlGEYtoUCAMSW6TIoKSmRJK1evVp/\n/vOfNXv2bLsyAQBizPQF5J6eHmVnZysrK0tPPhl/b98EAEye6SODlpaWsPX29narswAAHGK6DK5f\nvx62/uGHH1oeBgDgDNNlsHLlSr3zzjt2ZgEAOMR0GVRXV6u3t1c7d+7Ujh07dPr0aTtzAQBiyPQF\n5K9//etavnx5aH3Pnj22BAIAxJ7pI4ORRSBJ69evtzwMAMAZk75RHQBg6qAMAACUAQCAMgAAKIp3\nEwFTXeCVfU5HuC0Dr/7K6Qi3peaVp5yOcEejDID/595W5nSEUaIpqGlbfmBjksmJpqAqt1XbmGRy\n7qSC4jQRAIAyAABQBgAAUQYAAFEGAADZ/G6ijo4O1dfXy+12q6SkRDk5OePObWlpUWtrq4LBoFas\nWKG8vDw7owEARrC1DJqbm1VVVSVJqq2tVUVFxbhzvV6vNmzYIOnGHVEpAwCIHVtPE6WlpYWWU1NT\nJ5z70EMP2RkFADABW8vAMIzQcqQyuGn//v2jbpcNALCXraeJAoFAVPMbGhpUUFCg7OxsU/PT09Mn\nE8sRiZR1LGbyv/8HVwySTE4iP/+JnF0iv9PM5re1DPx+/5jLHo9HLpdLRUVFobHDhw8rOztb8+fP\nN719n89nTdAYSKSsYzGT/2vfNSLOccL7f3Al9POfyNkl8jvNbH5by6C4uFg1NTUyDEOlpaWh8cbG\nxrAy6Orq0uHDh1VYWKjW1lb19fXp6aeftjMaAGAEW8sgNzdXlZWVo8a3bt0atp6dna0f//jHdkYB\nAEyAD50BACgDAABlAAAQZQAAEN90Fnd+tmvY6QgA7kCUQZz5ybfj75+EggKmPk4TAQAoAwAAZQAA\nEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQ\nZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAEGUAABBlAAAQZQAAkJRs58Y7\nOjpUX18vt9utkpIS5eTkWDIXAGAtW48MmpubVVVVpU2bNqmpqcmyuQAAa9laBmlpaaHl1NRUy+YC\nAKxlaxkYhhFajvQCH81cAIC1bC2DQCBgy1wAgLVcxsg/yS1WW1uriooKSdLf//53ffOb35QkeTwe\nuVwuFRUVRZw7nvr6eptSA8DUtnbt2lFjtpbB5cuXdfDgQRmGodLSUmVnZ0uSfve738nlcmnLli0R\n5wIA7GdrGQAAEgMfOgMAUAYAgDu8DILBYMK9iykRM4+U6PmBqcrW21HEs3379unjjz9WeXm5cnNz\nJ5wbL7fKiCZzY2OjOjs7lZKSoi984QtasGDBhOOxEE1+SWpvb9fJkyfldrtVVlamlJQUPf/881q0\naJEMw9DVq1f1ve99LwbJR2tpaVFra6uCwaBWrFihvLy8cefGy/4TTeZ43H+iyS/F9/4TzfMYs/3H\nuIN5PB7j008/jTjvrbfeCi3/9a9/tTNSRGYzj1RXVxfVuJ3M5u/p6TE8Hs+o8c8++yy0vG/fPkuz\nRePgwYOh5UjPY7zsP9FkHile9p9o8sf7/jNSvOw/d/RpIrMS9VYZ7e3t+uUvfym/329qPJ54PB75\n/X7t3r1b58+fD41Pnz5dknTlyhVlZWU5FU8PPfSQ6bnxsv9Ek1mKv/0nmvzxvv9I5p/HWO0/d+xp\nomgYCXqrjPz8fP3whz9Ub2+vqfF4cvHiRc2bN08bNmxQXV2dCgoKlJT0379dPB6PHnzwQQcT3rB/\n/34tX758wjnxtv+YySzF7/5jJn8i7D9mn8dY7T8cGZiQyBc8k5OTdebMGdPj8SI5OVklJSWSpLvv\nvls+ny/s536/X9OmTXMg2X81NDSooKAg4gck42n/MZv5pnjbf8zmT4T9RzL3PMZq/6EMbuHxeNTS\n0hI2NvIwLh5PrYyVubu7O7R8+fLliONOGiv/woUL1dbWJkny+XyaPXt26GfBYDDsrzwnHD58WNnZ\n2Zo/f37YeDzvP9Fkjsf9J5r88b7/jPc8Orn/3LGniQ4cOKC2tjbNmDFD+fn5Wr16taQbV/lvvW9S\ncXGxampqQrfKcEo0mY8ePaqBgQENDw/ri1/8YsTxeMu/dOlS7dy5U2fOnFFmZqaSk/+7q37yySda\nuHBhTLOP1NXVpcOHD6uwsFCtra3q6+vT008/LSl+959oM8fb/hNt/njef6Txn0cn9x9uRwEA4DQR\nAIAyAACIMgAAiDIAAIgyAACIMgAA6A7+nAEwnoMHD+rSpUvKy8vTmjVrnI4DxARlANxizZo18nq9\n8ng8TkcBYoYywJR1/Phx7d27V0899ZQaGxuVlpamoqIiZWZmav/+/UpLS9Pw8LByc3O1YsWKsMeO\n91nMQ4cO6cqVK0pJSdHAwIDKysr0uc99TqdPn9bu3bu1du1arVq1KrT+8MMPa/Xq1erq6lJDQ4NS\nU1M1NDSkwsJCLVmyRJJ07Ngx7d+/X/Pnz9fMmTM1NDSkDRs2hH7nRx99pFOnTik5OVnBYFCXL1/W\n9u3bJUnnz5/XyZMnNX36dA0MDGj58uW69957bXpGMaXZdnNsIA68+eabo+5b//vf/94YGhoKrR89\netRob28Pm9PV1WU0NDSEjbW0tBhHjhwJrQ8ODho7duwIrR86dMi4dOlS6Ge7du0aN9df/vKXsPWX\nXnoptPzPf/7T+Ne//mUYxo377+/cuTNs7tGjRw3DMIz+/n6juro67Gc7duwI+38DzOLIAFOay+XS\no48+GjbW0dGhurq60HowGFRKSoruueeeCbd14cIFDQwM6K233pJ04+ihv78/9POVK1dq165d2rhx\no44ePapVq1aFfnblyhUdPnxYbrdbSUlJ6unpCdv23LlzQ8vTp0/X4OCgpBs3Mbv1xmwrV66UJHV2\ndqq3tzcsj9/vV3d3t+m7kgI3UQaY0owxTvcsWrRI3/jGN8JuXjY8PBzxsUuWLNF//vOfsFNKIx/n\ndrs1c+ZM9fb26sqVK2EvyO+8846+853vhO6W+cYbb0TMKUl33XWXTpw4oS9/+cuhsX//+9/KyspS\nXl6ecnJyVF5eHvpZMBgccztAJNyoDlPWnj17dOLEidC5+S996UvKz89Xf3+/3n33XbndbhmGoevX\nr+srX/mK7r33XnV0dKipqUn9/f3q6urS/PnzVVBQoPvuu0+SdOLECX3yySdKSkpSIBCQy+VSRUVF\n6Hf29/fr17/+tcrLy8PuPPnee+/p2rVrcrlcGhoa0oULF7Ru3Trdf//9amho0JEjR7Rt2zZlZ2er\nurpawWBQmzdvliSdPXtWp0+fVmpqqoaHh5WRkaGHH35Y0o1iOHbsWKjY+vr69Nhjj2nmzJkxeY4x\ndVAGAAA+dAYAoAwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAJL+D07P4qxtEWKdAAAAAElFTkSu\nQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1b161128>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# not very useful\n",
    "sns.boxplot(x='relevance', y='ratio_st_in_bl', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Positioned Word Matching"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['len_search_term'].max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def match_pos(row, col, pos):\n",
    "    if pos >= row['len_search_term'] or pos >= row['len_'+col]:\n",
    "        return 0\n",
    "    else:\n",
    "        return int(row['tokens_search_term'][pos] in row[col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for i in range(10):\n",
    "    df[str(i)+'th_word_in_pt'] = df.apply(lambda x: match_pos(x, 'product_title', i), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(10):\n",
    "    df[str(i)+'th_word_in_pd'] = df.apply(lambda x: match_pos(x, 'product_description', i), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for i in range(10):\n",
    "    df[str(i)+'th_word_in_bl'] = df.apply(lambda x: match_pos(x, 'bullet', i), axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Encode Brand Feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "brands = pd.unique(df.brand.ravel())\n",
    "brand_encoder = {}\n",
    "index = 1000\n",
    "for brand in brands:\n",
    "    brand_encoder[brand] = index\n",
    "    index += 10\n",
    "brand_encoder['nobrand'] = 500\n",
    "df['brand_encoded'] = df['brand'].map(lambda x: brand_encoder.get(x, 500))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pid_with_attr_material = pd.unique(df_material.product_uid.ravel())\n",
    "material_encoder = {}\n",
    "for pid in pid_with_attr_material:\n",
    "    material_encoder[pid] = 1\n",
    "df['flag_attr_has_material'] = df['product_uid'].map(lambda x: material_encoder.get(x, 0)).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pid_with_attr_color = pd.unique(df_color.product_uid.ravel())\n",
    "color_encoder = {}\n",
    "for pid in pid_with_attr_color:\n",
    "    color_encoder[pid] = 1\n",
    "df['flag_attr_has_color'] = df['product_uid'].map(lambda x: color_encoder.get(x, 0)).astype(np.float)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Encode Attributes Feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pids_with_attr = pd.unique(df_attr.product_uid.ravel())\n",
    "attr_encoder = {}\n",
    "for pid in pids_with_attr:\n",
    "    attr_encoder[pid] = 1\n",
    "df['flag_has_attr'] = df['product_uid'].map(lambda x: attr_encoder.get(x, 0)).astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1bad6ba8>"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEjtJREFUeJzt3ftTVHX8x/HXwoKKpCPqhgwWihmSmKIxtiPIKAna10uU\njpeyfuHXfuhHp/+hX6yZ+q3JpPFCSEZA4GjEencqBXFEoFRGkVAJGy57+f7gl/2C3I7KZ8/GPh8z\nzeyePXv2La0+2XN2zzoCgUBAAICIFmX3AAAA+xEDAAAxAAAQAwCAiAEAQMQAACDJaXLjHo9Hd+/e\nVUxMjJYsWaLU1NRR171z545qamoUHR2t3NxcJSYmmhwNADCII1SfMygvL9emTZtGvb2srExbtmyR\nJJWWlmrbtm2hGAsAIMOvDCSptbVVhw4d0qJFi8ZcLy4uLng5NjbW9FgAgEGMxyAlJUWffPKJurq6\nxlxv8AsUYgAAoWU8BpLkdDp15coV5eTkjLqOz+ezvL2ampqJGAsAIs769etHXG40Bp2dnUpISJAk\ntbW1BZfX19fL4XAoPT09uKynp2fEy6PJzMx8rtncbrfiFyx5rm1g8uluuSqPx2P3GPg/breb/x8T\n6NKlS6PeZjQGdXV16u3tldfr1bJly4LLPR7PsBhkZWWpuLhYgUBAeXl5JscCADzBaAw2b9484vKi\noqJhy5KSkrRr1y6T4wAARsGHzgAAxAAAQAwAACIGAAARAwCAiAEAQMQAACBiAAAQMQAAiBgAAEQM\nAAAiBgAAEQMAgIgBAEDEAAAgYgAAEDEAAIgYAABEDAAAIgYAABEDAICIAQBAxAAAIGIAABAxAACI\nGAAARAwAACIGAAARAwCAiAEAQMQAACBiAAAQMQAASHLaPYCduluu2j0CMMz/bNmizo4Ou8cIG263\n2+4RwkLCnDk6XlZmbPsRHYP4BUvsHgFhJhx+Qejs6NALbxbYPQbCTOfpCqPbZzcRAIAYAACIAQBA\nxAAAIGIAABAxAACIGAAARAwAACIGAAARAwCAiAEAQMQAACBiAACQ4bOWNjQ0qLGxUX6/X6tXr1Zy\ncvKo6x4/flw+n0+SlJqaqqVLl5ocDQAwiNEYdHR0qLCwUJJUXl4+ZgymTp2qvLw8k+MAAEZhNAY5\nOTmW1/V6vSopKZEkpaSkKDMz09RYAIAnhOTLbaqqqrRq1aox1yko+P8v86ioMPslDgCAoYzH4OTJ\nk1q0aJFcLpfl+zid4481a9as5xkLGBXPLYQrk89NozGora2Vy+XSwoULhyyvr6+Xw+FQenp6cFlr\na6tSUlIkSb29veNu+/79+xM6KzCA5xbClcnnprEYtLe3q7a2VmlpaWpsbFR3d7f27t0rSfJ4PMNi\ncPPmTZ0/f14Oh0MrVqwwNRYAYATGYuByubRv374RbysqKhq2LDs729QoAIBx8KEzAAAxAAAQAwCA\niAEAQMQAACBiAAAQMQAAiBgAAEQMAAAiBgAAEQMAgIgBAEDEAAAgYgAAEDEAAChE34EcjhJmz1Zn\ny1W7x0CYSZg92+4RJEn/nOZ7wBFaERuD4z/8YPcIYcPtdsvj8dg9BgZ54c0Cu0dAmDH9CwK7iQAA\nxAAAQAwAACIGAAARAwCAniIGXq9X7e3t8vv96u7uNjkTACDELMWgpaVFpaWlqqqqkiSVlZUZHQoA\nEFqWYnDhwgW99957io+PV1RUlOLj403PBQAIIUsxmDJliiTJ4XAYHQYAYA9LMejp6Rly3ev1GhkG\nAGAPS6ejWL58uQ4cOKCuri6VlJTojTfeMD0XACCELMVg8eLFSk5O1t27d5WcnKyYmBjTcwEAQsjy\nW0vj4uK0YMEC+Xw+k/MAAGxgKQZffPFF8KyW9+7d04kTJ4wOBQAILUsxSEpKktvtliTNnz9fnZ2d\nRocCAISWpRhERQ1dzemM2K9BAIBJyVIMnnwraV9fn5FhAAD2sPQr/rJly3T06FEtXLhQN27cUEZG\nhum5AAAhZCkGixYtUnJystra2vT2229r2rRppucCAISQ5Z3/U6dO1cKFC03OAgCwieUYNDU1BY8V\nnD9/Xh9++KGxoQAAoWXpAHJJSYm6u7sVFRUV/A8AMHlYemUQExOj5cuXB6+npaUZGwgAEHqWfsV/\n8pvN2tvbjQwDALCHpVcGzc3N+vLLLzV79mwFAgFdu3ZNn376qenZAAAhYikGGzZsGHLa6pMnT5qa\nBwBgA0u7iZ78/oLc3FwTswAAbPJMbwu6ePHiRM8BALCRpd1Ely9f1tWrV/XgwQPNmDFDra2tWrly\npenZAAAhYumVQWNjo3bs2KF58+Zp586dSk1NNT0XACCELMVg4Gsu/X7/kOsAgMnB0m6igdNQ+Hw+\n+f1+BQIBSxtvaGhQY2Oj/H6/Vq9ereTk5FHXvXPnjmpqahQdHa3c3FwlJiZaegwAwPOzFIOBdw9l\nZ2fr4MGDSkpKsrTxjo4OFRYWSpLKy8vHjMG5c+e0Z88eSVJpaam2bdtm6TEAAM/PUgxcLpckae7c\nuXr//fctbzwnJ8fyunFxccHLsbGxlu8HAHh+lmJQW1ur7OzsZ36QqqoqrVq1asx1Bu96shKDWbNm\nPfM8GI6fZ/iYM9eljtMVdo+BMDNnrsvo31NLMThx4oTu3bunNWvWBF8lWHXy5EktWrRo3Pv5fL6n\n2u79+/efan2MjZ9n+Cg7Vmr3CGHD7XbL4/HYPUbYMPn31NK7iQoLC7V582Y1NDTo8OHDamhosLTx\n2tpauVyuYV+KU19fP2wbPT09I14GAJhn6ZXBwHce5+bmqqurS59//rnS09PHvE97e7tqa2uVlpam\nxsZGdXd3a+/evZIkj8cjh8MxZBtZWVkqLi5WIBBQXl7es/55AADPwFIMfv31Vy1YsEB1dXWKj4/X\nxx9/PO59XC6X9u3bN+JtRUVFw5YlJSVp165dVsYBAEwwSzGoqanRunXr9O677yo6Otr0TACAELMU\ng8LCwuCuIgDA5GPpADIhAIDJzVIM/vnnHx05ckRff/21vF6vSkt56xsATCaWYlBRUaGtW7dq5syZ\ncjqdls9NBAD4b7AUgylTpigmJkYOh0OSOIgMAJOMpRj09/cPuT5wKmsAwORgKQZz585VXV2dvF6v\nzpw5o5deesn0XACAELIUg5ycHCUkJGjKlCmaN2+eMjMzTc8FAAghS58zkKQlS5ZoyZIlJmcBANjE\n0iuDJ9XX10/0HAAAGz1TDJqamiZ6DgCAjcbcTXTq1KlhywKBgJqbm40NBAAIvTFjcOvWLeXn5w/7\nkNnt27eNDgUACK0xY5CcnKw5c+YMWz59+nRjAwEAQm/MYwZr164dcfnq1auNDAMAsMczHUBOTEyc\n6DkAADayHIM///xT58+fl9frVUtLi8mZAAAhZikGZ86c0e3bt3Xr1i05nU6dPXvW9FwAgBCyFIO2\ntja53W5FRT1ePS4uzuhQAIDQshQDp3Pom44GTmUNAJgcLMXg33//ldfrlSR5vV75fD6jQwEAQsvS\nieo2bNigAwcO6M6dO/L5fMrPzzc9FwAghCzFoKOjQx999JHhUQAAdrG0m6ihoWHI9dbWVhOzAABs\nYvmYwWC///67kWEAAPawFAO3263KykrTswAAbGIpBt988426urp05MgRHT58WJcvXzY9FwAghCwd\nQN64caNWrVoVvF5eXm5sIABA6Fl6ZTA4BJK0adMmI8MAAOzxTGctBQBMLsQAAEAMAADEAAAgYgAA\nEDEAAIgYAABEDAAAIgYAABEDAICIAQBAxAAAIGIAABAxAACIGAAARAwAACIGAAAZjoHf75fP5zP5\nEACACWDpO5CfRUVFhVpaWrR161YlJSWNue7x48eD0UhNTdXSpUtNjQUAGIGxGBQUFKihocHSulOn\nTlVeXp6pUQAA4zAWg6fh9XpVUlIiSUpJSVFmZqbNEwFAZAmLGBQUFAQvV1RUWLrPrFmzTI0Tkfh5\nIlzx3AyNsIjBYE6ntZHu379veJLIws8T4YrnZmiE/K2l9fX1w44ltLa2Bi/39vaGeCIAgLFXBtXV\n1Wpubta0adOUkpKi7OxsSZLH45HD4VB6enpw3Zs3b+r8+fNyOBxasWKFqZEAAKMwFoPR3h1UVFQ0\nbNlAKAAA9uATyAAAYgAAIAYAABEDAICIAQBAxAAAIGIAABAxAACIGAAARAwAACIGAAARAwCAiAEA\nQMQAAKAw/KazSLJnzx61tLTYPYYkye122/r4CxYs0LfffmvrDEAkIwY24h8/AOGC3UQAAGIAACAG\nAAARAwCAiAEAQMQAACBiAAAQMQAAiBgAAEQMAAAiBgAAEQMAgIgBAEDEAAAgYgAAEDEAAIgYAABE\nDAAAIgYAABEDAICIAQBAxAAAIGIAABAxAACIGAAARAwAACIGAAARAwCAiAEAQMQAACDDMfD7/fL5\nfCYfAgAwAZymNlxRUaGWlhZt3bpVSUlJY657584d1dTUKDo6Wrm5uUpMTDQ1Fp7gdrslSR6Px+ZJ\nANjJWAwKCgrU0NBgad1z585pz549kqTS0lJt27bN1FgAgBGExTGDuLi44OXY2FgbJ4ksA68KnrwM\nIPKERQwCgUDwMjEAgNAztpvoaTzLQeZLly4ZmCSy7N+/f8h1fqYIN/v37+d5GSIhj0F9fb0cDofS\n09ODy3p6eka8PJr169cbmQ0AIpWxGFRXV6u5uVnTpk1TSkqKsrOzJT1+18qTMcjKylJxcbECgYDy\n8vJMjQQAGIUjMHiHPQAgIoXFAWQAgL2IAQCAGACwF6etCQ9h8dZSmPU0p/vg1CAIJU5bEz54ZRAB\nBk73sXPnTp05c2bC1gWeV0FBgdauXWtpXZ6bZhGDCPA0p/vg1CAIVzw3zSIGEeBpTvfBqUEQrnhu\nmkUMIsDTHJzjQB7CFc9Ns4hBBBjtdB/19fXDTjP+tKcGAUzguRl6vJsoAox2ug9ODQK7cdqa8MHp\nKAAA7CYCABADAICIAQBAxAAAIGIAABAxAACIGAAARAwQ5vr7+1VcXKyysjKVl5frr7/+ktfrVVlZ\nmT777DN1dHQYn6G7u1tfffXVsE/E2q2urk4PHjwY8bbOzk55PJ4QT4T/Mj6BjLB248YNZWRkaOnS\npUOWb9myRadOnQrJDPHx8VqzZk1IHutp9Pf3q7+/f8TbfD6f+vr6QjwR/suIAcLWlStXgr/d3rhx\nQ7NmzVJOTk7w9pE+PO/3+3X06FHFxMTI7/frhRde0FtvvRW8/cSJE3r48KEcDoecTqf++OMP7dmz\nRy+//PK485w9e1ZNTU2SpISEhGAgxntMj8ejtrY2TZkyRT6fT7Gxsdq0adO4j9fe3q6TJ08qNjZW\n/f39SktLU0ZGhiSpoaFBly5dUltbm+Li4hQdHa2NGzfK6XSqvb1dP//8s9rb2/Xw4UNJ0ptvvimX\nyyVJunDhgn766Sd98MEH8ng8iouLU3p6uhYvXjzuTJi8iAHC1tKlSxUV9XhP5uBz1IwlKipK27dv\nD14/dOhQ8HJjY6NmzpypdevWSZIuXbqknp4eSyGQpPnz5wfPiXP48GFLjylJzc3NeueddzR9+nT5\n/X61tbVZejyXy6UdO3YM2e5ADNLT03Xv3j299tprmjNnzrD75efnq76+fsQvjlm1apVaWlp07do1\n7d6929IsmPyIASaVvr4+VVZWyu/3y+Fw6MaNG8Hbrl+/rvz8/OD1jIwMNTc3W9724K9ljI6OtvSY\nkrR9+3adPn1aPT096unp0fLlyy093r1791RbW6vo6GhFRUUNOz4w1mnFxjvlmMPhGPKzAIgB/tOe\n/Efvxx9/1Pr16zVjxgxJ0qNHj4K3vfLKK7py5YoyMzMlPd4NNRHGekyv16vr168rNzc3uOzgwYNK\nSUkZd7uVlZXavXt38NXRd999N+R2p9Mpr9c74n2jo6NHvU0aPxaIPMQAYevcuXP67bffJD3+rT45\nOVkrV67UlStX1NTUpJaWFrW1tWn69OlavXq1XnzxRb366quqrKwM7mdvbW1VZWWl8vPzlZaWpurq\nan3//ffy+/2aPXt28B/asfT19QWPXSxevFh///23Ghsb9csvvygnJ0eLFy8e9TG9Xq+qqqqCrxZ6\ne3uVlZVl6c+flJSkY8eOyeFwBLd78eJFrVy5UpL0+uuv64cfflB8fLykxwe6B3aBJSQk6MGDByor\nKwvef8uWLXI6nSovL9e1a9d07Nix4HasxAmTG6ewRsTq6urS2bNnhxzsBSIVrwwQUR49eqTq6mr5\n/X719PSosLDQ7pGAsMArA0BSRUWFent7hywLBAKKiorShg0bNHXq1EnxmMBoiAEAgNNRAACIAQBA\nxAAAIGIAAJD0vxLWXnbzXnCmAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1b7c82e8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='flag_has_attr', y='relevance', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Distance Metrics"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### BOW"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cv = CountVectorizer(stop_words='english', max_features=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "CountVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
       "        lowercase=True, max_df=1.0, max_features=1000, min_df=1,\n",
       "        ngram_range=(1, 1), preprocessor=None, stop_words='english',\n",
       "        strip_accents=None, token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
       "        tokenizer=None, vocabulary=None)"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cv.fit(df['search_term'] + ' ' + df['product_title'] + ' ' + df['product_description'] + ' ' + df['bullet'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cv_of_st = cv.transform(df['search_term'])\n",
    "cv_of_pt = cv.transform(df['product_title'])\n",
    "cv_of_pd = cv.transform(df['product_description'])\n",
    "cv_of_bl = cv.transform(df['bullet'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### BOW based Cosine Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.metrics.pairwise import cosine_similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "cv_cos_sim_st_pt = [cosine_similarity(cv_of_st[i], cv_of_pt[i])[0][0] for i in range(cv_of_st.shape[0])]\n",
    "cv_cos_sim_st_pd = [cosine_similarity(cv_of_st[i], cv_of_pd[i])[0][0] for i in range(cv_of_st.shape[0])]\n",
    "cv_cos_sim_st_bl = [cosine_similarity(cv_of_st[i], cv_of_bl[i])[0][0] for i in range(cv_of_st.shape[0])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['cv_cos_sim_st_pt'] = cv_cos_sim_st_pt\n",
    "df['cv_cos_sim_st_pd'] = cv_cos_sim_st_pd\n",
    "df['cv_cos_sim_st_bl'] = cv_cos_sim_st_bl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x10bfccc88>"
      ]
     },
     "execution_count": 128,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGktJREFUeJzt3W1QVPehx/HfsoBIaSMEkWzQIEFL8bGJQ9VRw1WspCbF\n8WZakavJJGo6ye2LdKbTmd7OdPriTmdqZzrTXJ08dGw63hRrfIxR0bi5KEoddAgxLkGN+FBCEQhE\nK8rjnvvCcQPysAc8Z88ufj+vzh7+++eXzRl+7jl7/usyDMMQAOCBFuV0AACA8ygDAABlAACgDAAA\nogwAAApBGfj9fvX09Nj9awAA9yHazslLSkp06dIlFRQUyOPxDDm2urpaNTU18vv9mjt3rtLS0uyM\nBgDoxdYyyM/PV3V1tamxzc3NWrlypSTpwIEDlAEAhFDYXDNYtGiR0xEA4IEVNmVw1+HDhzVnzhyn\nYwDAAyWsyqC0tFSZmZlKSUlxOgoAPFBsvWYwGJ/PJ5fLpezs7MC+srIypaSkKCMjw9QcXq/XrngA\nMKotWbKk3z5by+DIkSOqra3V2LFjlZ6eroULF0qSysvL+5RBY2OjysrKlJWVpZqaGt28eVNr164N\nOv8TTzxhZ3wAGHUqKysH3O+K1FVLvV4vZQAAw1RZWTngO4OwumYAAHAGZQAAoAwAAJQBAECUAQBA\nlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECU\nAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAEAhKAO/36+enh67fw0A4D5E2zl5SUmJ\nLl26pIKCAnk8niHHNjQ0yOv1yu12Kzc3V6mpqXZGAwD0YmsZ5Ofnq7q62tTYiooKFRUVSZL27Nmj\nFStW2BltSFu2bJEkvfjii45lAIBQCptrBvHx8YHt2NhYB5NIXq9XXq/X0QwAEEphUwaGYQS2nSyD\nLVu2yO/3y+/3B94hAMBoFzZlEC4XmXu/I+DdAYAHha3XDAbj8/nkcrmUnZ0d2Nfe3j7g9lASExMt\nz+bE7xiOl19+WVeuXLFkrscee0xvvvmmJXOZRf6vkX94rMwuRXZ+O7K7jN7nZyx25MgR1dbWauzY\nsUpPT9fChQslSW+//bZcLpfWrVsXGFtfX6+jR4/KMAzl5eUpJSVlyLm9Xq+eeOIJyzNv2bJFH374\noSRp6dKlEXsRubCwUMXFxU7HGLFIzh/J2SXyO83u/JWVlVqyZEm//ba+M8jLyxtw//r16/vt83g8\nKiwstDOOKS+++GLg9FCkFgEADJcjp4nC3UCtCQCjGWUwAN4RAHjQhM2niQAAzqEMAACUAQCAMgAA\niDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAA6D7KoLu728ocAAAHmS6DxsbGPo/f\nf/99y8MAAJxhugz+/ve/93kcExNjeRgAgDNGfJrIxq9OBgCEWNBvOisvL1dnZ6dqa2t19OhRSXeu\nF3R1ddkeDgAQGkHLICsrSz09Pfriiy80bdo0GYYht9utpKSkUOQDAIRA0DK4+0d/6dKlSk5Otj0Q\nACD0TF8zSElJsTMHAMBBQd8Z3NXc3KyDBw8qPj5enZ2dWrZsGaeKAGCUMF0Ghw4dUlFRkaKiouT3\n+7V9+3atWrXKzmwAgBAxfZpo7Nixioq6MzwqKkpxcXG2hQIAhJbpMmhvb+/z+Pbt25KkU6dOWZsI\nABBypsvA7/dry5Ytqqys1DvvvKO4uDhVV1fr448/tjMfACAETF8zuHXrlhYsWCBJysnJsS0QACD0\nTJfB6tWrlZCQ0G//pEmTVFdXp7S0NEuDAQBCx3QZDFQEd/d7vd4By6ChoUFer1dut1u5ublKTU0d\ndP7Lly/r9OnT8vv9mjt3riZNmmQ2GgDgPpkug6EMtmhdRUWFioqKJEl79uzRihUrBp3js88+03PP\nPSdJ2r9/v21l8POf/1x1dXWWzJWWlqaNGzdaMhcAOMmSMnC5XAPuj4+PD2zHxsYOOUdPT4/8fr8k\n2fqxVbN/vAsLC1VcXGxbDgAIJ5aUwWB6v2MIVgbTp0/Xxo0b5XK5uJkNAELM1jLo6ekxPfaTTz7R\nL37xCxmGoZ07d5o6TZSYmHg/8Ryf327kd04kZ5fI7zQn8tt6zaD3jWq9t30+n1wul7KzswP7xowZ\nI+nOKadvfetbpn5va2vrSOKaZvf8diO/cyI5u0R+pzmR35IyGOxjpTk5OSouLpZhGMrLywvsLy8v\n71cG6enp2rVrlyRpxowZVsQCAJg04jK4du2aJkyYIEmaM2fOgGM8Ho8KCwv77V+/fn2/fVlZWcrK\nyhppHADAfTC9HEV1dXWfx+Xl5ZaHAQA4w3QZXLhwoc9jt9tteRgAgDNMl8G97t4TAACIfEGvGXzw\nwQe6ffu2zp07p66uLhmGIZfLxddgAsAoErQMnnnmGUnSvn379Oyzz9oeCAAQeqZPEz399NN25gAA\nOMh0GURH23qzMgDAQabL4MCBA6qrq9Ply5f117/+VZ9//rmduQAAIWS6DDo7O+XxeFRVVaXVq1er\nsrLSzlwAgBAyXQZut1tRUVGB5aXvriUEAIh8psugo6NDbW1ttn7XAADAGabLwOPxaPfu3Zo3b57O\nnj2rixcv2pkLABBCpj8iNH/+fM2fP1/SnUXlMjMzbQsFAAitES1HER0d3ed0UVVVlWWBAAChN+K1\niXq7cuWKFdMAABxiSRkM9k1nAIDIYEkZuFwuK6YBADjEkjIAAEQ2ygAAwDUDAMB9lEFjY2Nge/bs\n2ZaEAQA4w/RNZ6dPn1Ztba2ioqJkGIbq6ur02muvSZLS09PtygcACAHTZXDhwgUVFhbamQUA4BDT\np4l6enpUX1+v5uZmNTc3q6SkxM5cAIAQMv3OoL29XefPn5fb7ZYkfeMb37AtFAAgtEyXQUJCgnJz\ncwOPm5qa7MgDAHCA6TI4c+aM/H6/YmNjZRiGzp07p1/96ld2ZgMAhIjpMigqKtK0adMCj30+ny2B\nAAChZ/oCcu8ikKSpU6daHgYA4AzT7wzuVVJSomeffXbIMQ0NDfJ6vXK73crNzVVqauqQ4y9fvqyq\nqiq53W7l5+crJiZmpPEAAMMw4jLw+/1Bx1RUVKioqEiStGfPHq1YsWLQsdevX9etW7eGHAMAsEfQ\nMvjNb36jDRs2qLi4WJMmTZKkwAXkgoKCIZ8bHx8f2I6NjR1yrM/nU1xcnHbt2qXp06dzGgoAQiho\nGfz617+WJGVkZPT5V/uePXuCTt57AbtgZXDlyhVNmDBBK1eu1P79+5WZmamoKBZVBYBQMH2aaO7c\nuX0ejxs3Luhzenp6zAeJjg7cx/Doo4+qtbVVDz/88JDPSUxMND3/SNg9v93I75xIzi6R32lO5Ddd\nBvde/O19A9pg2tvbB9z2+XxyuVzKzs4O7JsyZYpqa2uVmZmp1tZWTZ8+Pej8ra2tJpKPnN3z2438\nzonk7BL5neZE/hFfQDYjJydHxcXFMgxDeXl5gf3l5eX9ymD27NnasWOHzp49q+TkZEVH2xoNANCL\n6b+4Bw4c0MyZM9Xd3a3y8nLl5OQoMzNzyOd4PJ4BVzpdv379gOOfe+45s3EAABYyXQadnZ3yeDx6\n//33tXr1am3fvj1oGQCAGa+++lO1tDRbNp9Vy+0nJSVr06bXg4776Ss/VXNr+OVPTkzW65uD55eG\nUQZut1tRUVGKi4uTJI0ZM2Zk6QDgHi0tzSrcsNXpGP0Uv7XG1Ljm1ma9s+RNm9MM3wvel02PNf3Z\nzY6ODrW1tQXKAAAwepguA4/Ho927d2vevHk6e/asLl68aGcuAEAImT5NNH/+fM2fP1+SlJWVxfUC\nABhFRnSLb3R0dJ/TRVVVVZYFAgCEniXrPVy5csWKaQAADrGkDHqvQQQAiDyW3ObrcrmsmAbACP3k\n1f/U9ZYvLZvPqs+5P5T0sN7Y9D+WzAV7seYDMApcb/lSY9a95nSMfq7/6Q9OR4BJrBENAOCaAQDA\notNEs2fPtmIawDE/efUVXW+xbtlg6865J+qNTZstmQsYyojK4OrVq5o4cWLgwnF6erqVmYCQu97S\nKveGfKdj9HP9rRKnI+ABYfo00e9//3tdvXpVBw8eVF1dnfbu3WtnLgBACJkug29/+9uaNGmSDMMI\nLEsBABgdTJeB3++XdGcpawDA6GK6DP71r3/p8OHDevzxxyVxoxkAjCamLyCvXLlS165d0+TJk/XJ\nJ5+op6fHzlwAgBAyXQbx8fGaPHmyJGnWrFmaNWuWbaEAAKE1rI+WNjc3q7a2VhkZGUpOTrYrEyLQ\nK6/+RK0t1y2d06rP6icmPaTNm96wZC5gtDJdBmfOnFF9fb2mT5+uiooKpaWlaebMmXZmQwRpbbmu\nf3shPO9E/793rC0pYDQyfQH5/Pnzys/PV1pamn7wgx/o3LlzduYCAISQ6TKIiYnp8zg2NtbyMAAA\nZ5gug9u3bw/5GAAQuUxfM8jNzdVf/vIXTZgwQY2Njfr+979vZ64H0n+++hN9aeFFWKsuwD6c9JD+\nhwuwwJBe8L7sdIT7YroMUlNT9fzzz+vmzZtKSEiwM9MD68uW6/qvfw+/7xv6751cgAWCeWfJm05H\n6Gc4BTXs7zNISEhQe3v7cJ8GAAhjpstg8+bNKi8vlyQ1NTXpo48+si0UACC0TJeBx+MJrFY6ceJE\ntbS0BH1OQ0OD3n33XW3btk0NDQ1Bx7e3t+u3v/2t6uvrzcYCAFjAdBlERfUdGh0d/Nx2RUWFioqK\ntGrVKp08eTLo+NLSUi1btsxsJACARUyXQXd3d5/HnZ2dQZ8THx8f2A52X0Jra6vi4+MVFxdnNhIA\nwCKmP7oyc+ZM7dy5UxkZGbp48aJmzJgR9DmG8fXyBMHK4Pjx41q+fLlqamrMRgIwihS/tcbpCA80\n02WQmZmptLQ01dfXa/ny5Ro7dmzQ5wxnmetr165p3759unbtmtLT0+XxeII+JzEx0fT8I2H3/JEk\n0l+LSM4fydkl8/kLN2y1OcnwFb+15oF5/Yf1ofa4uDhlZGT0219VVaXZs2f329/7I6i9t30+n1wu\nl7KzswP71q1bJ0mqrq7WuHHjTOVpbW01nX0k7J4/kkT6axHJ+SM5u0R+p5nNb8kdTleuXBmwDHJy\nclRcXCzDMJSXlxfYX15e3q8MJKmlpUXHjx83/c4AAGANS8qg97WB3jwez4BLIqxfv37A8UlJSdqw\nYcOIc/z0lVfV3Br8I69mWbWcQ3Jikl7fvMmSuYDBdPzpD05HQASzpAzC5fuQm1tbtLXgP5yO0c+a\nvf/rdASY0PNWidMR7suYda85HaEfCipyhN9COIBD3BvynY7QT6QXFCLHsNcmAgCMPpaUwWDXDAAA\nkcF0GQx1z8BAnyQCAEQO02Wwbdu2QX+Wnp5uRRYAgENMl0FTU5O2bdumY8eO9VunCAAQ2Ux/mujH\nP/6xHnnkETU0NGjfvn2KjY3V8uXL7cwGAAgR0+8MJkyYIOnO119OmjTJ1PcTAAAig+l3Brt371Zi\nYqKampr05JNP6qWXXrIzFwAghEyXwfnz57Vu3TqNHz/ezjwAAAeYPk30wgsvUAQAMEqZfmcQFxen\nW7duKT4+Xg0NDRo3bhzfSoY+/u+d8FijCsDwmS6Dd999VwsXLtSsWbOUkJCgQ4cOqaCgwM5siDD/\n9kJ43olOSQHBmT5NNHHiRM2aNUuSlJCQoK6uLttCAQBCy3QZ3LtMdUxMjOVhAADOMF0G3d3dgQXp\n/H6/bt++bVsoAEBomb5msHjxYhUXFys2Nlbt7e16+umn7cz1wPrvnSz1ASD0TJfBuHHjtHr1ajuz\nQNJ//Xv4fd8QBQWMfny5DQCAMgAA8B3IAHDfkhOT9YL3Zadj9JOcmGx6LGUAAPfp9c2vWzZXYWGh\niouLLZvPLE4TAQAoAwAAZQAAEGUAABAXkAGEgaSkZBW/tcbpGP0kJZn/NE6kowwAOG7Tpsj/NE6k\ns7UMGhoa5PV65Xa7lZubq9TU1EHHVldXq6amRn6/X3PnzlVaWpqd0QAAvdhaBhUVFSoqKpIk7dmz\nRytWrBh0bHNzs1auXClJOnDgAGUAACFk6wXk+Pj4wHZsbOyQYxctWmRnFADAEGwtg7vffyAFL4O7\nDh8+rDlz5tgVCQAwAFvLoKenZ1jjS0tLlZmZqZSUFJsSAQAGYus1g/b29gG3fT6fXC6XsrOzA/vK\nysqUkpKijIwM0/MnJiZaEzQEIinrQMjvHDPZE5OT1fqnP4QgzfAkJic78tpH8v9vyZn8tpZBTk6O\niouLZRiG8vLyAvvLy8v7lEFjY6PKysqUlZWlmpoa3bx5U2vXrg06f2trq23ZrRZJWQdCfueYyb75\n9fD9aKYTr30k//+WnMlvaxl4PB4VFhb2279+/fo+j1NSUvTLX/7SzigAgCGMupvO1uz9X6cjIAI9\nlJSo62+VOB2jn4eSIvt0ByLHqCuDrQX/4XSEfiio8PfGps2WzcUdsIhELFQHAKAMAACUAQBAlAEA\nQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBA\nlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAlAEAQJQBAECUAQBAUrSdkzc0NMjr9crtdis3N1epqamW\njAUAWMvWdwYVFRUqKirSqlWrdPLkScvGAgCsZWsZxMfHB7ZjY2MtGwsAsJatZWAYRmA72B/44YwF\nAFjL1jLo6emxZSwAwFouo/c/yS22Z88erVixQpL0wQcf6JlnnpEk+Xw+uVwuZWdnBx07GK/Xa1Nq\nABjdlixZ0m+frWVQX1+vo0ePyjAM5eXlKSUlRZL09ttvy+Vyad26dUHHAgDsZ2sZAAAiAzedAQAo\nAwDAA14Gfr8/4j7FFImZe4v0/MBoZetyFOGspKREly5dUkFBgTwez5Bjw2WpjOFkLi8v17Vr1xQT\nE6PvfOc7evzxx4fcHwrDyS9Jly9fVlVVldxut/Lz8xUTE6ONGzdq6tSpMgxDX375pV566aUQJO+v\nurpaNTU18vv9mjt3rtLS0gYdGy7Hz3Ayh+PxM5z8UngfP8N5HUN2/BgPMJ/PZ3zxxRdBx+3duzew\nvXv3bjsjBWU2c2/79+8f1n47mc3/1VdfGT6fr9/+27dvB7ZLSkoszTYcR48eDWwHex3D5fgZTube\nwuX4GU7+cD9+eguX4+eBPk1kVqQulXH58mX97ne/U3t7u6n94cTn86m9vV27du3S+fPnA/vj4uIk\nSU1NTRo/frxT8bRo0SLTY8Pl+BlOZin8jp/h5A/340cy/zqG6vh5YE8TDYcRoUtlpKen62c/+5lu\n3Lhhan84uXLliiZMmKCVK1dq//79yszMVFTU1/928fl8mjdvnoMJ7zh8+LDmzJkz5JhwO37MZJbC\n9/gxkz8Sjh+zr2Oojh/eGZgQyRc8o6OjdfbsWdP7w0V0dLRyc3MlSY8++qhaW1v7/Ly9vV1jxoxx\nINnXSktLlZmZGfQGyXA6fsxmvivcjh+z+SPh+JHMvY6hOn4og3v4fD5VV1f32df7bVw4nloZKHNL\nS0tgu76+Puh+Jw2Uf8qUKaqtrZUktba26qGHHgr8zO/39/lXnhPKysqUkpKijIyMPvvD+fgZTuZw\nPH6Gkz/cj5/BXkcnj58H9jTRkSNHVFtbq7Fjxyo9PV0LFy6UdOcq/73rJuXk5Ki4uDiwVIZThpP5\nxIkT6ujoUHd3t2bOnBl0f7jlnz17tnbs2KGzZ88qOTlZ0dFfH6pXr17VlClTQpq9t8bGRpWVlSkr\nK0s1NTW6efOm1q5dKyl8j5/hZg6342e4+cP5+JEGfx2dPH5YjgIAwGkiAABlAAAQZQAAEGUAABBl\nAAAQZQAA0AN8nwEwmKNHj6qurk5paWl66qmnnI4DhARlANzjqaeeUnNzs3w+n9NRgJChDDBqnT59\nWgcPHtSaNWtUXl6u+Ph4ZWdnKzk5WYcPH1Z8fLy6u7vl8Xg0d+7cPs8d7F7MY8eOqampSTExMero\n6FB+fr6++c1v6tNPP9WuXbu0ZMkSLViwIPB48eLFWrhwoRobG1VaWqrY2Fh1dXUpKytLM2bMkCSd\nOnVKhw8fVkZGhhISEtTV1aWVK1cGfufnn3+uM2fOKDo6Wn6/X/X19XrllVckSefPn1dVVZXi4uLU\n0dGhOXPmaPLkyTa9ohjVbFscGwgD27dv77du/Z///Gejq6sr8PjEiRPG5cuX+4xpbGw0SktL++yr\nrq42jh8/Hnjc2dlpvPfee4HHx44dM+rq6gI/27lz56C5/va3v/V5/MYbbwS2P/74Y+PChQuGYdxZ\nf3/Hjh19xp44ccIwDMNoa2sztm7d2udn7733Xp//NsAs3hlgVHO5XFq2bFmffQ0NDdq/f3/gsd/v\nV0xMjB577LEh57p48aI6Ojq0d+9eSXfePbS1tQV+Pn/+fO3cuVM/+tGPdOLECS1YsCDws6amJpWV\nlcntdisqKkpfffVVn7kffvjhwHZcXJw6Ozsl3VnE7N6F2ebPny9Junbtmm7cuNEnT3t7u1paWkyv\nSgrcRRlgVDMGON0zdepULV++vM/iZd3d3UGfO2PGDP3zn//sc0qp9/PcbrcSEhJ048YNNTU19fmD\nfOjQIa1evTqwWua2bduC5pSkRx55RJWVlfrud78b2PePf/xD48ePV1pamlJTU1VQUBD4md/vH3Ae\nIBgWqsOodeDAAVVWVgbOzc+aNUvp6elqa2vThx9+KLfbLcMwdOvWLX3ve9/T5MmT1dDQoJMnT6qt\nrU2NjY3KyMhQZmampk2bJkmqrKzU1atXFRUVpZ6eHrlcLq1YsSLwO9va2vTHP/5RBQUFfVae/Oij\nj3T9+nW5XC51dXXp4sWLWrp0qZ588kmVlpbq+PHj2rBhg1JSUrR161b5/X49//zzkqTPPvtMn376\nqWJjY9Xd3a2kpCQtXrxY0p1iOHXqVKDYbt68qR/+8IdKSEgIyWuM0YMyAABw0xkAgDIAAIgyAACI\nMgAAiDIAAIgyAACIMgAAiDIAAEj6f7vS0rpxJUdTAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10be4aba8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='cv_cos_sim_st_pt', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x10ce62748>"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X1wVNXdB/DvZvPGGoXEEMIaY4gBk4iCwqRIeYkSK+JL\nMkhtQio4QmhHH/+oM05n+rR2WueZaYszzrSVsbZjdaguxQBhgBjR+CCQnTRgjJLdIEhMYho3L2QB\nScjr3ucPntzuJjfZu8m9e/fsfj9/nb25uft1vewv59x7zzFJkiSBiIgiWpTRAYiIyHgsBkRExGJA\nREQsBkREBBYDIiICiwEREQGI1vPgLpcL1dXVMJvNyM/PR2pq6qT7dnd3o7q6GhaLBYsXL0ZmZqae\n0YiIyIuuPYO6ujqUlpaiuLgYtbW1U+5bX1+P4uJiPP744/j222/1jEVEROPoWgwsFovcjo2NnXLf\nuLg4XLt2DR6PB5999hmGh4f1jEZERF50HSbyfrjZXzFYtWoVqqqqMDQ0hDvuuAN9fX2YM2eOnvGI\niOj/6VoMRkdHVe8bHR2NRx99FABQUVHh06sgIiJ96VoMBgYGFNsOhwMmkwm5ubkTfufSpUvo6+vz\n25Oorq7WLigRUQRZt27dhG26FoO8vDzYbDZIkoSCggJ5u91un1AMGhsb0dTUBJPJhB/+8Ieqjn/v\nvfdqnpmIxOR0OvHyyy8DAH71q18p/rFJ12/WUWISddbS6upqFgMijTmdTgAQ8ov0t7/9LZqamgAA\nOTk5eOmllwxOFLjKykoAwIYNG3R7j/r6esWeAR86IyJZeXk5ysvLjY4Rsfbt24d9+/YZ8t4sBkQE\n4HqvoKmpCU1NTXIPQSSbNm1SbIuisrIS/f396O/vl3sIwcRiEIacTqeQ/5jJWN49AhF7B7m5ucjJ\nyUFOTo6Qw1zePQIjege6XkAmY4z9QxZxzJSM09fXp9gWiYg9glDBnkGYEb2rT8YZHBxUbIskNzdX\nyF4BAHz/+99XbAcLi0GYEb2rT8b57rvvFNsUHO3t7YrtYGExINKQyNdrkpOTFdsiEfnzNxqLQZgR\n/Y4K0Yl8a+bWrVsV2yJ5++238fbbbxsdY1qWL1+u2A4WFoMwI/odFSIT/XpNbm4uEhMTkZiYKOS5\n43Q60dbWhra2NiE//8OHDyu2g4XFIAxt2rSJvQIDhMP1mqtXr+Lq1atGx5gW7x6BiL0Dt9ut2A4W\nFoMwJPIdFSLr7+9XbIuisrISw8PDGB4eNuShp5nq6elRbIvCZDIptoOFxYBII97TfIk45dfevXsV\n26KYO3euYlsU3nOtGTHvGosBkUZuuOEGxbYovFcXFHGlwUWLFim2RcFbS4nChNF3g8zU7NmzFdui\nOH78uGJbFN3d3YrtYGExCEO819oYp0+fVmyL4qabblJsi2JoaEixLQpeMyDNiXyvOyBuMRP9ArL3\nUrMiLjsbHR2t2BZFVFSUYjto7x/0dyRdiX6vOyBuMZtsmVdRiP7A4vz58xXbooiJiVFsB4uuxcDl\ncuGdd97Bnj174HK5pty3paUF5eXl2Lt3L9ra2vSMFdZEv9dd5GJ26dIlxTYFh+h3Ez3xxBOK7WDR\ntS9VV1eH0tJSAEBFRQWKioom3bepqUn+a+TIkSNIT0/XMxqFqPHFjNNwB4/on7332r6TrfNrpBdf\nfFH1XUK7d+/G7t27J/15Wloadu7cqVU0ADoXA+9xx9jY2Cn3HR0dhcfjAQDEx8frGSusbdq0SV4U\nXMSuvsjj7nPmzEFnZ6fcpuAK9ec81Hx5V1ZWYvfu3bDZbEFI5EvXYSLv/yH+isHixYuxc+dOvPLK\nK1i4cKGescJabm4u0tPTkZ6eLuRTyKH+D3oqcXFxim1RiH5rrNlsVmyLZMOGDYa9t649g9HRUdX7\nfv755/j5z38OSZKwb98+VcNEiYmJM4kXtsbupBDx8xn/D1qk/4bx9+mLlB0ATp486dMeG+IVxbZt\n2/DGG2/IbdE+f29GZNe1GEx2d4XD4YDJZPL5y3XsLymTyaT6HmcjJnMKdU6nE83NzQCAmpoa4XoH\n44eJRPp/nJqaijNnzshtkbIDE5+AFS3//fffLxeD+++/X7j83sJuorq8vDzYbDa8++67yMvLk7fb\n7XbY7XaffTMyMrB//37s378fCxYs0DNWWBP9biKRV9uqqalRbItC9Oko3nzzTcU2qaNrz8BqtaKk\npGTC9rKysgnbsrOzkZ2drWeciCDyBVjg+i2Bra2tclsk3sOigQyRkjY+/PBDn/YzzzxjYBrx8KGz\nMCPyBVgAWLNmjWJbBKI/wWv0dAhkLBaDMCP6zJkiz+/jvSiMiAvEiH7u0MywGIQZ0acU6OvrU2yL\nYGRkRLEtCtGvGdDMiDebE01pbA3ksbZoRB6qiI2NxeDgoNwWTajfpx/IE7wAFK9XetPjKV6RsRgo\nGJsTR8QvU0DMHsEYkSd7W7x4MT799FO5LZonnnhCngLBiLlx/FHzxT1WAIx4gld0LAYKxhbT/v3v\nf29wkukRtYgBYq9j29DQoNgOJWr/uvY3Nw4Qmn9ZP/jggz53FZF6LAbjOJ1OedZUp9Mp9BeriES+\nPVOE7P6+vI2cG0cLzzzzDIvBNPEC8jhjvYLxbQoOo+d0nwmRr3eMMXJuHDIWewbjjM06Ob4tksrK\nSgBi/sMuLi6WhyeKi4sNTjOR2mEWSZJ4AZOEwmIQhvbs2QNAzGKwYcOGkM7v78t78+bNkCRJ2GEW\nilwcJhonNTVVsS2KyspKDA8PY3h4WO4hiKa4uDgkewVqFBQUGB2BaFpYDMbZsmWLYlsUY39Vj2+L\nZMOGDSHZK1CD8+GQqDhMNE5ubi5uu+02uS2aUH+KNNAHh6bCMXci7bAYKBCxRyAKtV/eJSUlHHcn\nCiIWAwUi9giIiGaC1wzCjMj36RORcVgMwsz8+fMV20REU9F1mMjlcqG6uhpmsxn5+flT3qq5c+dO\nLFq0CJIk4eLFi9i2bZue0aYk8kR1a9eulR/aWrt2rcFpiEgUuvYM6urqUFpaiuLiYtTW1k657/PP\nP4/CwkIUFRUhLS1Nz1h+lZeXC7l+MCD24jBEZBxdi4H30n/+5nePj48HAHR3dxu69q3T6URTUxOa\nmprkHgIRUbjTtRh4r8GrdrEPh8OBO++8U69Ifnn3CETsHYi+0hkRGUPXawbTmcZ3YGAAcXFxqvZN\nTEwM+Pj+REdH+7T1eA89JSQk+LRFy++N2Y3D/MYyIr+uxWCyVascDgdMJtOEC7QejwdRUeo7K263\ne+YhxykqKsKZM2fkth7voaddu3b5tH/3u98ZmGZmRPvsvYmcHWB+oxmRX9dikJeXB5vNBkmSfCbw\nstvtisWgra0NCxcu1DNS2HO5XIptIqKp6FoMrFar4pzuZWVlivtnZGToGUeV8dcMXnrpJQPTEBEF\nBx86G6e/v1+xLQrRp+AmImOwGIzjfQeUd1sUok/BTUTGmPYw0aeffoply5ZpmSUk3HDDDYrtUBHI\nFNAvv/zylD/nFNBENMZvMfjLX/6CuXPnwuPxoLm5GWlpaWhra0NCQkJYFoNNmzbJX6KheJ++mi9v\np9OJl19+mVNAE5FqfovB8uXLsWzZMhw8eBDPPPMMkpOT0dPTg48//jgY+YIuNzcXOTk5cltEouYm\nIuP4LQZjf/17PB4kJycDAJKTkzFr1ix9kxlo+fLlRkcgIgoq1ReQR0ZGfF5fu3ZN8zCh4vTp05zk\njYgiiupikJSUhNraWgwODqK2tlb4x70nw4nqiCgSqS4G69atw+zZs1FVVYWbbroJDz74oJ65DCP6\nRHVERNMR0K2lOTk58sXVcNXX16fYJiIKZ6qLQU9PD95//31YLBYMDQ3hoYceQlJSkp7ZDDE4OKjY\nJiIKZ6qLwQcffIDS0lJERUXB4/Fg7969KC4u1jObIb777jvFNhFROFN9zWDWrFny9NJRUVHyymTh\nZvx6AEREkUB1MfBejwD4z62lp06d0jaRwUwmk2KbiCicqS4GHo8Hb775Jurr6/HWW28hPj4eTqcT\nn332mZ75go7DREQUiVRfM+jv78eqVasAXF+0JlzNnTsXra2tcpuIKBKoLgabN29WHENPT09He3s7\n0tLSNA1mlC1btsgT1XEKaCKKFKqHiSa7mJqQkIBPP/1U8WculwvvvPMO9uzZo2oJxpaWFlRUVODQ\noUMYHh5WG01TLS0tim0ionCmyeI2ky0CU1dXh9LSUhQXF6O2tnbKY1y+fBn9/f0oKirCY489hpiY\nGC2iBWzv3r2KbSKicKZJMZjsrhuLxSK3Y2NjpzyGw+HAwMAA9u/fj3PnzmkRa1qGhoYU20RE4UzX\nZS+9ewz+ikFraysuXbqEjRs34vz58/B4PHpGm5Toy14SEU3HtJe9VGN0dFT1vtHR0cjPzwcA3HLL\nLXC73bj55pun/J1gzJwq8uysImcHxM4vcnaA+Y1mRH5NisFkf0F7P6jm3XY4HDCZTD4rci1cuBDN\nzc3IysqC2+3G4sWL/b6v2+2eQWplMTEx8sXrmJgYXd4jWETODoidX+TsAPMbzYj8mhSDyW4rzcvL\ng81mgyRJKCgokLfb7fYJxWDp0qUoLy9HY2MjkpOTER2ta6dlUnFxcXIxiIuLMyQDEVGwTfsbt7Oz\nE/PmzQMw+TKRVqsVJSUlE7aXlZUp7h8KC9BzCmsiikSqLyCPX/XLbrdrHoaIiIyhuhicP3/e57XZ\nbNY8TCgYm5l1fJuIKJxN+9vOqFs/9eZ9rcKo6xZERMHm99vu8OHDuHbtGr788ksMDw9DkiSYTCak\npKQEI1/QWSwWeYUz74fmiIjCmd9i8OijjwIADh06hMcee0z3QEbjspdEFIlUDxM9/PDDeuYIGTfe\neKNim4gonKkuBpEyfs7pKIgoEqkuBpWVlWhvb0dLSwveffddfPXVV3rmMszly5cV20RE4Ux1MRga\nGoLVakVDQwM2b96M+vp6PXMZxns+pUDmViIiEpnqYmA2mxEVFYX4+HgA4TtVA4sBEUUi1cVgcHAQ\nfX19cjEIV94P04Xrg3VEROOpLgZWqxUHDhzAfffdh8bGRly4cEHPXIYZGRlRbBMRhTPVtwitXLkS\nK1euBABkZ2cjKytLt1BERBRc05qOIjo62me4qKGhQbNAREQUfJrMxNba2qrFYYiIyCCaFAM+nEVE\nJDZNHis2mUyK210uF6qrq2E2m5Gfn4/U1NRJj3H48GH5Vs7bb79d1bKXehi/7CURUSTQdY6Juro6\nlJaWAgAqKipQVFQ06b7x8fE+S2MahXcTEVEk0rUYeE8BHRsbO+W+IyMj2L9/PwAgIyMD9957r57R\nJsW5iYgoEmlSDCb70vTe7q8YrF+/Xm5XVVVpEYuIiFSa9gXkrq4uub106VLFfaY7nUOkzJBKRBQq\nVH/rnj59Gs3NzYiKioIkSWhvb8fPfvYzANeHdZQMDAwoth0OB0wmE3Jzc+VtLS0t8nHULiqTmJio\nNv60BeM99CJydkDs/CJnB5jfaEbkV10Mzp8/j5KSkoAOnpeXB5vNBkmSfC4O2+32CcXgm2++walT\np2AymXDPPfeoOr7b7Q4oz3QE4z30InJ2QOz8ImcHmN9oRuRXXQxGR0fR0dEhj/2fPn3aZ5xfidVq\nVSwgZWVlE7atXr1abRRdxcfHy72YcJ+Uj4hojOpiMDAwgHPnzskzed5www26hTISby0lokikuhgk\nJCQgPz9fft3d3a1HHsOxGBBRJFJdDL744gt4PB7ExsZCkiR8+eWX+OUvf6lnNkPwCWQiikSqi0Fp\naSnuvPNO+bXD4dAlkN5efPFFtLe3q9p3eHh4yovmaWlp2Llzp1bRiIgMo7oYeBcCAFi0aJHmYYJB\nzZf3li1bMDw8DJvNFoRERETGm/ZDZ+H8lHBxcbHREYiIgmraxcDj8WiZI6Rs2LDB6AhEREHld5jo\nN7/5DXbs2AGbzYb09HQAkC8gFxYW6h6QiIj057cY/PrXvwYAZGZm+kxBXVFRoV8qIiIKKtXDRCtW\nrPB5PWfOHM3DEBGRMVQXg/GrlHk/gEZERGLTZA1kIiISm+piUFlZifb2drS0tODdd9/FV199pWcu\nIiIKItXFYGhoCFarFQ0NDdi8eTPq6+v1zEVEREGkuhiYzWZERUXJ0zrHxcXpFoqIiIJLdTEYHBxE\nX18f5/gnIgpDqouB1WrFgQMHcN9996GxsREXLlzQMxcREQWR6onqVq5ciZUrVwIAsrOzkZWV5fd3\nXC4XqqurYTabkZ+fP+H21PEGBgbw6quvYuvWrbBarWqjERHRDE3r1tLo6Gif4aKGhgbF/erq6lBa\nWori4mLU1tb6Pe6xY8fw0EMPTScSERHNgCbPGbS2tiput1gscnts7eTJuN1uWCwWXpMgIjKAJsVA\nkiS/2/0Vg5MnT2LVqlVaxCEiogCpvmYwFZPJpLh9dHRU9TE6Oztx6NAhdHZ2IiMjg9cMiIiCSJNi\nMJmBgQHFtsPhgMlkQm5urrxt+/btAACn06l6ErzExESNkhpzfL0xv3FEzg4wv9GMyK9rMcjLy4PN\nZoMkSSgoKJC32+32CcUAAHp7e3Hy5EnVPQO326155mAeX2/MbxyRswPMbzQj8mtSDCa7ZmC1WhUX\nlC8rK1PcPykpCTt27NAiEhERBUCTC8hLly7V4jBERGSQafUM2tracOutt8oXjjMyMrTMREQklOef\nfR497h7Njqc0ojIdyYnJ+NOuP6naV3UxeOWVV/Dkk0/C4XBg9uzZqK+v91kGk4goUvW4e/DWur8Y\nHWOCp6t/onpf1cNEd9xxB9LT0yFJkjwtBRERhQfVxcDj8QC4PpU1ERGFF9XF4LvvvsPRo0dx++23\nA5j8QTMiIhKP6msGGzduRGdnJxYsWIDPP/88oKeLiYim8txzz6O3N/QuwCYlJeO119RdgBWd6mJg\nsViwYMECAMCSJUuwZMkS3UIRUWTp7e1ByY7dRseYwPbGU0ZHCJqAnjPo6elBXV0denq0q+BERGQ8\n1T2DL774Ah0dHVi8eDHq6uqQlpaGu+++W89sRKTST5/7L1zuvajZ8bQaZpmddDNef+3PmhyL9KW6\nGJw7dw6bNm0CAKSlpeG9995jMSAKEZd7LyJu+8+MjjHB5b+9anQEUkl1MYiJifF57W99AiKR/PS5\nZ3G5V7vJwbT7yzoRr7+2S5NjEU1FdTG4du3alK+JRHa51w3zjvVGx5jg8htVRkegCKG6GOTn5+Pt\nt9/GvHnz0NXVhR/84Ad65iIioiBSXQxSU1OxdetWXL16FQkJCXpmIiKiIAt4CuuEhASfVcuIiEh8\nqovBrl27YLfbAQDd3d34+OOPdQtFRETBpXqYyGq1yrOV3nrrrfjXv/7l93dcLheqq6thNpuRn5+P\n1NTUSfe12+3o7OxETEwMcnJy5DmQiIhIf6qLQVSUbyciOtr/r9bV1aG0tBQAUFFRMeX6B97TYldW\nVrIYEBEFkepiMDIy4vN6aGjI7+9YLBa5rea5hJaWFuzduxdZWVlqYxERkQZUF4O7774b+/btQ2Zm\nJi5cuIC77rrL7+9IkiS31RSDjIwMvPDCC7hy5YraWEREpAHVxSArKwtpaWno6OjAI488glmzZvn9\nnelMcx0dHY3GxkasWbPG776JiYkBHz8Qeh9fb8HMX/rjElzs0e4JXkC7p3hvTk7EO/+waXIsI/A8\nNFak5FddDAAgPj4emZmZE7Y3NDRg6dKlE7Z734Lq3XY4HDCZTMjNzZW39fb2IikpCQDQ0dGhKo/b\nre2XT7CPr7dg5r/Y48b9T0v+dzTA/77lFvr/pcjZAeY3mtr8ARWDybS2tioWg7y8PNhsNkiShIKC\nAnm73W6fUAxqamowODiIkZERToBHRBRkmhQD72sD3qxWq2JXv6ysbMK2xx57TIsoREQ0DQE/gayE\n6yETEYlNk2JARERiYzEgIiJ9rxlQYP7ruZ/iYu9lzY6n2a2ZSbPx59de1+RYRBSaVBeD0dFRmM1m\nxZ8p3UlEgbvYexn//YQm9VlT/7NPuwJFRKFJ9TDRnj17Jv1ZRkaGFlmIiMggqotBd3c39uzZg+PH\nj0+Yp4iIiMSmekziRz/6EebPnw+Xy4VDhw4hNjYWjzzyiJ7ZiIgoSFT3DObNmwfg+vKX6enpcLlc\nuoUiIqLgUt0zOHDgABITE9Hd3Y1ly5Zh27ZteuYiIqIgUl0Mzp07h+3bt2Pu3Ll65iEiIgOoHiZ6\n+umnWQiIiMKU6p5BfHw8+vv7YbFY4HK5MGfOHMTHx+uZjYhIGE9X/8ToCDOiuhi88847WL16NZYs\nWYKEhAR88MEHKCws1DMbEZEw3lr3F6MjTBBIgVJdDG699VYsWbIEAJCQkIDh4eHAkxERTcL2xlNG\nR4hoqovB+GmqY2JiNA9DRJGrZMduoyNMEEkFSvUF5JGREXlCOo/Hg2vXrukWioiIgkt1z+CBBx6A\nzWZDbGwsBgYG8PDDD/v9HZfLherqapjNZuTn5yM1NXXSfZ1OJ86ePQuPx4MVK1YgLS1NbTQiIpoh\n1cVgzpw52Lx5c0AHr6urQ2lpKQCgoqICRUVFk+7b09ODjRs3AgAqKytZDIiIgkjX+ZItFovcjo2N\nnXLfNWvW6BmFKOwN/u1VoyOQwHQtBt6L3vgrBmOOHj2K5cuX6xWJKGzFbf+Z0REmYIESh67FYHR0\nNKD9jx07hqysLKSkpKjaPzExcTqxVNP7+CIR/bNQk3/0jaogJAlcJHz2oSxS8utaDAYGBhTbDocD\nJpMJubm58rYTJ04gJSUFmZmZqo/vdru1CWrQ8UUi+mehJr95x/ogJAnM6BtVEfHZh7JIya9rMcjL\ny4PNZoMkSSgoKJC32+12n2LQ1dWFEydOIDs7G2fPnsXVq1exZcsWPaMREZEXXYuB1WpVXJS9rKzM\n53VKSgp+8YtfzPj9nn/2OfS4e2d8nDFaLSifnJiEP+16TZNjERHpIfRWX5+BHncvdhf+2OgYEzx1\n8B9GRyAimpLqJ5CJiCh8sRgQERGLARERsRgQERFYDIiICCwGRESEMLu1lIz1v2+Z/O9ERCGJxYA0\nc//Tkv+dDMAiReQfh4mIiIjFgIiIWAyIiAgsBkREBBYDIiIC7yYKOf+zb8ToCEQUgVgMQsx/PxF6\n/0tYoIjCX0gNE3k8noDXTSYiopnT9c9Ql8uF6upqmM1m5OfnIzU1ddJ9q6qq8PXXX6OwsBBWq1XP\nWERENI6uxaCurg6lpaUAgIqKChQVFU267/r16+F0OvWMQ0REk9B1mMhiscjt2NhYPd+KiIhmQNdi\nIEn/mauGxYCIKHTpOkyk98XgxMREXY+vJZGyKmF+44icHWB+o6nNr2sxGBgYUGw7HA6YTCbk5ubO\n6Phut3tGvx9MImVVwvzGETk7wPxGU5tf12KQl5cHm80GSZJQUFAgb7fb7ROKwUcffYTm5mbMmjUL\nGRkZWL16tZ7RiIjIi67FwGq1oqSkZML2srKyCdu8iwURkUiSE5PxdPVPjI4xQXJisup9Q+9xVyKK\nOElJybC98ZTRMSZISlL3ZfqnXX/S7D1LSkpgs9k0O55aLAZEZLjXXhP/y1R0ITUdBRERGSPsegZP\nHfyH0RGIiIQTdsVgd+GPjY4wAQsU6W120s24/LdXjY4xweykm42OQCqFXTEgmo7ZSYm4/EaV0TEm\nmJ2k7oGh11/7s2bvyTH3yMRiQATg9dd2aXYsfpmSiHgBmYiIWAyIiIjFgIiIwGJARERgMSAiIrAY\nEBERWAyIiAgsBkREBBYDIiKCzk8gu1wuVFdXw2w2Iz8/H6mpqZrsS0RE2tK1Z1BXV4fS0lIUFxej\ntrZWs32JiEhbuhYDi8Uit2NjYzXbl4iItKVrMZAkSW77+4IPZF8iItKWrsVgdHRUl32JiEhbJsn7\nT3KNVVRUoKioCABw+PBhPProowAAh8MBk8mE3Nxcv/tOprq6WqfUREThbd26dRO26VoMOjo68Mkn\nn0CSJBQUFCAlJQUA8Ne//hUmkwnbt2/3uy8REelP12JARERi4ENnRETEYkBERBFeDDwej3B3MYmY\n2Zvo+YnCla7TUYSyqqoqfP311ygsLITVap1y31CZKiOQzHa7HZ2dnYiJiUFOTg5uv/32KbcHQyD5\nAaClpQUNDQ0wm81Yv349YmJisHPnTixatAiSJOHixYvYtm1bEJJP5HQ6cfbsWXg8HqxYsQJpaWmT\n7hsq508gmUPx/AkkPxDa508gn2PQzh8pgjkcDunf//633/0OHjwotw8cOKBnJL/UZvZ25MiRgLbr\nSW3+S5cuSQ6HY8L2a9euye2qqipNswXik08+kdv+PsdQOX8CyewtVM6fQPKH+vnjLVTOn4geJlJL\n1KkyWlpa8Ic//AEDAwOqtocSh8OBgYEB7N+/H+fOnZO3x8fHAwC6u7sxd+5co+JhzZo1qvcNlfMn\nkMxA6J0/geQP9fMHUP85Buv8idhhokBIgk6VkZGRgRdeeAFXrlxRtT2UtLa2Yt68edi4cSOOHDmC\nrKwsREX9528Xh8OB++67z8CE1x09ehTLly+fcp9QO3/UZAZC9/xRk1+E80ft5xis84c9AxVEvuAZ\nHR2NxsZG1dtDRXR0NPLz8wEAt9xyC9xut8/PBwYGEBcXZ0Cy/zh27BiysrL8PiAZSueP2sxjQu38\nUZtfhPMHUPc5Buv8YTEYx+FwwOl0+mzz7saF4tCKUube3l653dHR4Xe7kZTyL1y4EM3NzQAAt9uN\n2bNnyz/zeDw+f+UZ4cSJE0hJSUFmZqbP9lA+fwLJHIrnTyD5Q/38mexzNPL8idhhoo8++gjNzc2Y\nNWsWMjIysHr1agDXr/KPnzcpLy8PNptNnirDKIFkrqmpweDgIEZGRnD33Xf73R5q+ZcuXYry8nI0\nNjYiOTkZ0dH/OVXb2tqwcOHCoGb31tXVhRMnTiA7Oxtnz57F1atXsWXLFgChe/4EmjnUzp9A84fy\n+QNM/jkaef5wOgoiIuIwERERsRgQERFYDIiICCwGREQEFgMiIgKLARERIYKfMyCazCeffIL29nak\npaVh7dppj4/+AAADV0lEQVS1RschCgoWA6Jx1q5di56eHjgcDqOjEAUNiwGFrdOnT+P999/HU089\nBbvdDovFgtzcXCQnJ+Po0aOwWCwYGRmB1WrFihUrfH53smcxjx8/ju7ubsTExGBwcBDr16/HjTfe\niDNnzmD//v1Yt24dVq1aJb9+4IEHsHr1anR1deHYsWOIjY3F8PAwsrOzcddddwEATp06haNHjyIz\nMxMJCQkYHh7Gxo0b5ff86quv8MUXXyA6OhoejwcdHR149tlnAQDnzp1DQ0MD4uPjMTg4iOXLl2PB\nggU6faIU1nSbHJsoBOzdu3fCvPV///vfpeHhYfl1TU2N1NLS4rNPV1eXdOzYMZ9tTqdTOnnypPx6\naGhIeu+99+TXx48fl9rb2+Wf7du3b9Jc//znP31ev/7663L7s88+k86fPy9J0vX598vLy332ramp\nkSRJkvr6+qTdu3f7/Oy9997z+W8jUos9AwprJpMJDz30kM82l8uFI0eOyK89Hg9iYmJw2223TXms\nCxcuYHBwEAcPHgRwvffQ19cn/3zlypXYt28fnnzySdTU1GDVqlXyz7q7u3HixAmYzWZERUXh0qVL\nPse++eab5XZ8fDyGhoYAXJ/EbPzEbCtXrgQAdHZ24sqVKz55BgYG0Nvbq3pWUqIxLAYU1iSF4Z5F\nixbhkUce8Zm8bGRkxO/v3nXXXfj22299hpS8f89sNiMhIQFXrlxBd3e3zxfyBx98gM2bN8uzZe7Z\ns8dvTgCYP38+6uvrcc8998jbvvnmG8ydOxdpaWlITU1FYWGh/DOPx6N4HCJ/OFEdha3KykrU19fL\nY/NLlixBRkYG+vr68OGHH8JsNkOSJPT39+N73/seFixYAJfLhdraWvT19aGrqwuZmZnIysrCnXfe\nCQCor69HW1sboqKiMDo6CpPJhKKiIvk9+/r68Mc//hGFhYU+M09+/PHHuHz5MkwmE4aHh3HhwgU8\n+OCDWLZsGY4dO4aTJ09ix44dSElJwe7du+HxeLB161YAQFNTE86cOYPY2FiMjIwgKSkJDzzwAIDr\nheHUqVNyYbt69Soef/xxJCQkBOUzpvDBYkBERHzojIiIWAyIiAgsBkREBBYDIiICiwEREYHFgIiI\nwGJARERgMSAiIgD/BzDpWwxOnriKAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10ce77b00>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='cv_cos_sim_st_pd', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x111ed0550>"
      ]
     },
     "execution_count": 134,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtQVOfdB/Dv7nKTEG9BxA0qErS6XlMdqtYL9ZJ4SQND\nrFVpNGPUt5O8+aOdZDqTt0kn47wznepMpzV10vStTWJabAU1NVIvIfXKOMYgXnZVjEYoRW6yYgUX\ngT3vHwwnu3Bgz8I5++yzfD9/PXs47P5cD+d7znOe8xyLoigKiIhoQLOKLoCIiMRjGBAREcOAiIgY\nBkREBIYBEREhBGHg9XrR3t5u9scQEVE/RJn55ocPH8bXX3+NrKws2O32Xtetrq5GUVERbDYbMjMz\nkZycbGZpRETkw2L2fQYulwtDhw4NGAZ///vf8fzzzwMADhw4gOzsbDPLIiIiH2FzzSA+Pl5tx8TE\nCKyEiGjgCZsw8D1BYRgQEYVW2IQBLzITEYlj6gXknjidTlgsFjgcDnWZx+PRbPekqKjIlNqIiCLd\n4sWLuy0zNQw+++wz3Lp1C4MGDUJqairmz58PACguLu4WBhkZGcjLy4OiKFiyZImu9//2t79tSt1E\nfeFyubB161YAwFtvveW3fZP5+P3rU1JSornc1DDoaae+efPmbsvsdjvWrl1rZjlEpnI4HJg0aZLa\nptDi998/QrqJiCLVqlWrRJcwoPH77zuGAZGBeEQqFr//vgub0URERCQOw4CIiBgGRETEMCAiIjAM\niIgIDAMiIgLDgIiIwDAgIiIwDIiICAwDIiICw4CIiMAwICIiMAyIiMKGy+WCy+US8tkMAyKiMJGf\nn4/8/Hwhn80wiEAijy6IqG9cLheuXr2Kq1evCvn7ZRhEIJFHF0TUN75/syL+fhkGEUb00QURyYlh\nEGFEH10QiSRzF6nvIztFPL6TYUBEEUPmLlKHw4FJkyZh0qRJQh7fyTCIMKKPLohEiYQu0lWrVgn7\nu2UYRBjRRxdEokRCF6nD4RD2dxsl5FPJVDwjIKJg8cwgAok8uiC58QKsWIWFhSgsLBTy2TwzICJV\nZ/fK22+/LbiS4HV2kXa2ZVRQUAAAWLFiRcg/m2FARAC+uQDb2ZZxhyrrGQHQcVbQ3NystkMdCOwm\nIiIAvAArWudZQdd2qDAMiIiIYUBEHSLhAqzMXnjhBc12qPCaAREB6OhiGTNmjNqWUedIHBEXYPsr\nNTVVsx0qPDMgIpXFYoHFYhFdRp8VFBQI6W83guhrNgwDIgPJPE7f5XKhvLwc5eXlUv4bOkfjNDc3\nCxurLzOGAZGBZJ4oTfSRaX+JHo3TX6Kv2fCaAZFBImGcPokj+qY5nhkQGUT2I2vRR6b99d3vflez\nLZNZs2Zh1qxZQj6bZwZEBED8kWl/Xb9+XbMtk/PnzwOIwOkoqqurUVRUBJvNhszMTCQnJ/e47u3b\nt3H+/Hl4vV7Mnj1bHeJGJItVq1Zh69ataltGstYNAPX19ZptWYjuZjS1m+jcuXPIzc3FmjVrcPbs\n2V7XvXr1KlatWoXVq1fj8uXLZpYVkMwjQkgch8OB+Ph4xMfHS3lkDcg9ncOIESM027IQ3c1oahjE\nx8er7ZiYmF7XbW9vh9frhdfrRVxcnJllBSTziJBIIGsYu1wudWijjPUDYqdQ7q/169drtkkfU8NA\nURS1HSgMpkyZgm3btmH79u0YP368mWX1KhIenSc7WcNY9JGdEWS+acvhcCA2NhaxsbFSnt34XjgW\ncRHZ1GsG7e3tute9ePEifvazn0FRFBQUFOi6ZjBs2LD+lKfpwIEDfm0ZRyVcvHgRADB9+nTBlQTv\n4sWLar9pRUWFVP+Gmpoav7YZ26eZ9u3bp06h/M9//hM5OTmCKwrOxYsX0dLSAkC+bQcAjh075tfO\nzc0N6eebGgYej0ez7XQ6YbFY/NI7NjYWQMft8IMHD9b1/m6326BKv9HW1ubXNuMzzLZz504AwC9/\n+UvBlQTvgw8+8GvL9JAV323F7XZLt+18/PHHfu3vfe97AqsJnszbDtD9YCLU24+pYZCRkYG8vDwo\nioIlS5aoy4uLi7uFQWpqKvbt2wcAmDp1qpll9Ur2ESGdUwp0tmU8XZaVxWJRu0Zlnt+HBiZTrxnY\n7XasXbsW69atQ1JSkrp88+bN2LRpk9+6EydORE5ODnJycoReM+gcaz1p0iQpd6QfffSRZlsWMt/4\n9PTTT2u2ZSF6CuX+Et3n3l+++0jfdqjwDmQNq1atkm5H1Kmurk6zLQuZw7izv71rWxYrVqxQh8bK\nOAX0iRMnNNuyeOaZZzTbocI7kDXIthPylZiYiIqKCrUtIxmP6iKFjGcEnbr2ucum8+7jzjafgUz9\nsmHDBs22TM6fP+/3hyEL2bspgI5rdyIerGIEr9er2SZ9GAYaZL3pCfjmaVVjxoyR8gxH5vs8uh7Z\nyUjWezwAwGq1arZlIfp6mXzfWAjI/AcBdJwRyHpWEAk3bslK5iAGgJEjR2q2ZeFwODB27FiMHTuW\nU1iHA9n/IAC555eRmegju/6SPYgXLlyo2ZaJoih+MzeEEsOgC9n/IGQn+w6VxJF9NJHL5UJFRQUq\nKiqEHIgyDCisyDy0VPYDCdmDWPYprEVvP30OAxmTVw/Z/yAiQedYd9nIfp+Bw+FAdHQ0oqOjpQti\nQP4prO/cuaPZDpU+h8G9e/eMrCNsyHxkGikuXLiACxcuiC4jaL59vaL6ffvD5XKhtbUVra2tUl4v\nW7BggWZbFo2NjZrtUAl409lHH33U7ShNURRcv34dWVlZphUmEs8IxNm1a5c6RnzXrl3YuHGj4Ir0\ne+yxxzTbsvj973/v1/7Nb34jsJrg7d27168t413UIgUMg8GDByM7O7vbct+pniMNzwjEKSoq8mvL\nFAayT3JYW1ur2ZZFT7Mkkz4Bu4l6mjROxps6iIjClehuxoB79MmTJ2suX7ZsmeHFEKWnp2u2ZfDu\nu+9qtolk0OfD+0CPsSTqi8rKSs22DLo+3IYoGKKn09A9a2lDQwMKCwsRHx+PlpYWLF++HEOHDjWz\nNiIi3RISEvDgwQO1LRubzaYOnrDZbCH/fN3xc+TIEaxduxY5OTlYvXo1Dh8+bGZd1A+FhYUoLCwU\nXUafyPyAFdFHdgPd8OHDNduyaG1t1WyHiu4tdtCgQWpa2Ww2xMXFmVYU9U9BQQEKCgpEl9EnK1as\nQGxsLGJjYzk0kIIi+9Be0XSHQUtLS6+vKTwUFhaiubkZzc3N0p4dyHoHsux3wMouEp4nIVLAMOic\n23/QoEHYtWsXSkpK8Mc//pEP/A5TvmcEMp4duFwuuN1uuN1u6e6CFf3YwoHu4MGDmm3SJ2AYnD59\nGkDHML/Zs2cjLi4Oc+bMidjpKEisDz/8ULMtg5MnT2q2ZeF7gCfjwZ7vPon7p+AFHE20bt06zSvz\nY8aMUduVlZVISUkxtjLqkxdeeAG7d+9W27KReebJqqoqzbYsrFYr2tvb1TYNLAH/x3saouW7/Msv\nvzSuIuqXFStWqDNPyngB9vHHH9dsy0D0aJD+En0HLIllSPxzwyGjxMbGarbJfHyg/MBmSBjI2L8Y\nqQoLC9VpiGUcTSR7vzWRrNgxGGFkH03ErgpxfO96FXEHLInFMKCw4nv/Cu9lCa3Oi8dd2zQw8JpB\nhJF5OgegYw4srTYRmcuQMOCw0vCxYsUKWK1WWK1WKUcT8eiUSIw+h0FNTY3a5q3f4cPlcsHr9cLr\n9Up3By8Av2koZJySgkhWusOg646luLjY8GLCRecUHDLKz8/XbMvi4cOHmm0iMpfuMLhx44bf60ge\nbZCfny/ljjQSsJuISIw+dxNF6k0pLpcLV69exdWrV6U8O/C9fiPjtRwObyQSI+DcRJ9++ikePnyI\n69evo7W1FYqiwGKxICkpKRT1hVzXbpa3335bYDXB6zpZ2saNGwVWE7zBgwerj4wcPHiw4GqIBo6A\nYfDcc88B6JgS9vvf/77pBYkm+9DGR48eabZl0fnYwq5tIjKX7m6i5cuXm1lH2Lh7965mWxZRUVGa\nbVnIPtkbkax07y36smOprq5GUVERbDYbMjMzkZyc3Ov6t2/fRmlpKWw2G5YtW4bo6OigP7O/2tra\nNNuysNvtKC8vV9tERHro3sMXFhZi2rRpaGtrQ3FxMTIyMpCent7r75w7dw65ubkAgAMHDiA7O7vH\ndRsbG9Hc3NzrOqEQHR2tHpGKCKP+SkxMVMMgMTFRcDXBs1qt6uAEzqlPFDq6/9oePXoEu92O0tJS\nrFu3DiUlJQF/x/emoZiYmF7XdTqd8Hg82LdvH8rKyvSWZbg1a9ZotmXh+2wJGZ8zwWmUicTQHQY2\nmw1WqxVxcXEA9M017ztnUaAwKC8vx71795CTk4MbN24I2xHI/nAYIqK+0N1N1NLSgqamJjUM9Ajm\npqGoqChkZmYCAJ588km43W488cQTun/fSDKeERAR9YfuMLDb7di/fz9+8IMf4MqVK7h582bA3/F4\nPJptp9MJi8UCh8OhLhs/fjxu3bqF9PR0uN1uTJkyJeD7Dxs2TG/5Qem8ziGjUaNG4c6dO2rbrO8o\nVGSuX+baAdYvWqjr1x0Gc+fOxdy5cwEAEydODHjxGAAyMjKQl5cHRVGwZMkSdXlxcXG3MJgxYwby\n8/Nx5coVJCYm6hq91HlzktE6nxAmYzfRpk2bsHXrVrVt1ncUKjLXL3PtAOsXLdT192kgelRUlN/O\nurS0FDNmzOi2nt1ux9q1a7st37x5s+b7rlq1qi/lGK7zCWEyhgERUV8YMnavcyhjJCgsLERzczOa\nm5ulfIbw+++/r9kmIuoNn3TWhezPEK6trdVsExH1xpAwsFgsRrxNWJB9CmU+UJ6I+oK3eHbBJ20R\n0UDEMOiiublZsy0L37O0SDpjIyJz8ZpBF7JPhzB06FDNNhFRb/ocBr4XJ7WGlcrKd3I0GSdK851c\nT8aJ9ohIDN33GZw/fx63bt2C1WqFoiiorKzET37yEwBAamqqWfWF3NChQ1FTU6O2ZdPY2KjZJiLq\nje4wuHHjhuYNZJHGd+6lYOZhCheyd3MRkRi6+0Ha29tRVVWF+vp61NfX4/Dhw2bWRX0k+9BYIhJD\n95mBx+NBWVkZbDYbAOCxxx4zrSiRZB+nzzMDIuoL3WGQkJCgTjENAHV1dWbUIxyHZhLRQKQ7DC5d\nugSv14uYmBgoioLr16/j5z//uZm1CSH7mQERUV/oDoPc3FxMnjxZfe10Ok0pSLSensFARBTJdF9A\n9g0CAJgwYYLhxYSDhoYGzTYRUSTr811VkTqaqK2tTbNNRBTJ+vRwG4AjVWhgeuONN1BZWalr3UD3\n5aSkpGDbtm1GlEXUbwHD4J133sGWLVuQl5eHMWPGAIB6ATkrK8v0AonCSW8779deew319fUAgMTE\nROzYsSNUZenGMKOeBAyDX/ziFwCAtLQ0ZGdnq8sPHDhgXlUCWSwWdRQRh5ZSMHbs2KHuQMMxCIDe\nw2z79u348ssvAQAzZ87E66+/HqqydAkmyACGWbB0dxPNnj3b77WM8/bowaGl1B+JiYnq2YFsXn/9\ndXUHGm5BAPQeZEDHgWtZWRmAjgEu77zzTijKihi6LyAnJyf7vfa9AY2IOoTrGYFeM2fOFF1Cn/nu\n/BkEwZNvjmYiMk04nhEEI1KHvIeC7jAoLCxEZWUlbt++jb/85S/46quvzKxLGNmfZ0A0kPGMoO90\n7+0ePXoEu92O0tJSrFu3DiUlJWbWJUznRHxd20REkUx3GNhsNlitVnWO/9jYWNOKEolTQBPRQKQ7\nDFpaWtDU1CTlA1+CwSmgiWgg0h0Gdrsd+/fvx5w5c3DlyhXcvHnTzLqIiCiEdN9nMHfuXMydOxcA\nMHHiRKSnp5tWFBERhVafhstERUX5dReVlpYaVhAREYWeIWMny8vLjXibsMChpUQ0EPV51lJfnLaB\n9DJyfhnOLUNkHEPCIJImdIuPj8eDBw/UNhkr0M7bNyy4sycKHfaDdNHS0qLZptDw3fkzCIhCh2HQ\nBZ90Jl5KSoroEogGHEPCIJKuGfh2eUVS95dMeEZAFHqGXDOYMWOGEW8TFsL9eQa8AEtEZuhTGFRU\nVGD06NHqkXNqaqqRNQlls9nU7qFwnKgu0M7b5XJh69atAIC33noLDocjFGURUQDhfiCnOwy2b9+O\n1atXw+l0YsiQISgpKfF7DKaW6upqFBUVwWazITMzs9sDcrryeDz49a9/jQ0bNsBut+stzVCydxM5\nHA7YbDa0t7czCIjCSKCdd9edf15enpnldKP7msG3vvUtjBkzBoqiqNNSBHLu3Dnk5uZizZo1OHv2\nbMD1jx8/jmeffVZvSaaIhAvIb775pugSiChIvjv/UAcBEEQYdM7gGUzXie84/ZiYmF7XdbvdiI+P\nFz4rarhfM9CDZwREFCzdYfCf//wHR48exVNPPQVAXxeK7840UBicPn0a8+bN01sOEVHEEXFG0En3\nNYOcnBzU1NRg3LhxuHjxoq4HvwTzcJiamhocPHgQNTU1SE1NFXbNgIhoINIdBvHx8Rg3bhwAYPr0\n6Zg+fXrA3/F4PJptp9MJi8Xi152xadMmAB2jYYYOHaqrpmHDhulaLxhWq1XtErNaraZ8RqjIXDsg\nd/0y1w6wftFE1B/U0NL6+nrcunULaWlpSExMDLh+RkYG8vLyoCgKlixZoi4vLi7uFgYA0NDQgNOn\nT+s+M3C73cGUr0vXJ52Z8RmhInPtgNz1y1w7wPpFE1G/7jC4dOkSqqqqMGXKFJw7dw4pKSmYNm1a\nr79jt9s1x8pu3rxZc/3hw4djy5YteksiIiKD6L6AXFZWhmXLliElJQUrVqzA9evXzayLiIhCSHcY\nREdH+70ONDqIiIjkoTsMHj582OtrIiKSl+5rBpmZmfjwww8xcuRI1NbW4plnnjGzLiIiCiHdYZCc\nnIwNGzbgwYMHSEhIMLMmIiIKsaCfZ5CQkOB3zwAREclPdxjs3LkTxcXFAIC6ujp8/vnnphVFRESh\npTsM7Ha7Olvp6NGj0dDQYFpRREQUWrrDwGr1XzUqypCHpBERURjQHQZd5/Z/9OiR4cUQEZEYug/v\np02bhoKCAqSlpeHmzZuYOnWqmXUREVEI6Q6D9PR0pKSkoKqqCitXrsSgQYPMrIuIiEIoqKGlcXFx\nSEtL6xYEpaWlhhZFREShFfR9BlrKy8uNeBsiIhLEkDCQ9VnBRETUwZAw0PM8ZCIiCl+GhAEREcmN\nYUBERLxmQEREQdxn0N7eDpvNpvmzGTNmGFaQ2d544w1UVlbqXl/rGc6dUlJSsG3bNiPKIiISSncY\n7NmzB7m5uZo/S01NNaoe0wXaee/atQvHjh0DACxduhQbN24MRVlERELp7iaqq6vDnj17cPLkyW7z\nFEUS350/g4CIBgrdZwY//OEPMWrUKFRXV+PgwYOIiYnBypUrzaxNmKVLl6pnB0REA4HuM4ORI0cC\n6Hj85ZgxY1BdXW1aUaLxjICIBhrdZwb79+/HsGHDUFdXh5kzZ+Lll182sy4iIgoh3WFQVlaGTZs2\nYcSIEWbWQ0REAujuJnrppZcYBEREEUp3GMTFxaG5uRkAUF1dDY/HY1pRREQUWrrD4M9//jNu3LgB\nAEhISMCRI0dMK4qIiEJLdxiMHj0a06dPB9ARBq2traYVRUREoaU7DLpOUx0dHW14MUREJIbuMGhr\na1MnpPN6vXj48KFpRRERUWjpHlq6aNEi5OXlISYmBh6PB8uXLzezLiIiCiHdYTB06FCsW7fOzFqI\niEgQPtyGiIgYBkRExDAgIiIEcc2gL6qrq1FUVASbzYbMzEwkJyf3uK7L5cK1a9fg9Xoxe/ZspKSk\nmFkaERH5MDUMzp07pz4d7cCBA8jOzu5x3fr6euTk5AAACgsLGQZERCFkajdRfHy82o6Jiel13QUL\nFphZChER9cLUM4POm9SAwGHQ6ejRo5g1a5ZZJRFp+vGrr6CxwW3Y+61du9aQ9xkyfBje+93OgOv9\n+NX/RmPDXUM+EzCy/ifw3u/eNeS9yFymhkF7e3tQ6x8/fhzp6elISkrStf6wYcP6UpZuZr+/2Vi/\nfo0Nbti2LAvZ5+nV+P5hXd9DY8NdxG76SQgqCk7j//1aV/25P1qPu/W1hn2uUWH2RGIS/vzxR4a8\nVzBE/O2aGga+01z7tp1OJywWCxwOh7rs1KlTSEpKQlpamu73d7uNO5IT8f5mY/2RQfbvQU/9d+tr\nsXbL7hBUE5y8918U8v2L+ExTwyAjIwN5eXlQFAVLlixRlxcXF/uFQW1tLU6dOoWJEyfi2rVrePDg\nAdavX29maURE5MPUMLDb7Zqna5s3b/Z7nZSUhDfffNPMUoiIqBemhgENHK+8+mO4GxoNfU+j+n2H\nDR+Cnb97z5D3IopUDAMyhLuhEd97SQm8ogD//MDYkCKKRJyOgoiIeGYQTv771R/jroFdLYYNrxs+\nBO+ym4WoR6+98hrq3fWGvZ9Rf7uJwxKxY+cOXesyDMLI3YZG/M8L4fdf8r8F7GYh6k29ux4fLP69\n6DK6eanov3Svy24iIiJiGBAREcOAiIjAMCAiIjAMiIgIDAMiIgLDgIiIwDAgIiIwDIiICBF2B/Jr\nr7yKeneDYe9n3C3hw7Fj5+8MeS8iIjNEVBjUuxuwO+tHosvo5sVPPhZdAhFRr9hNREREDAMiImIY\nEBERGAZERASGARERgWFARERgGBARERgGREQEhgEREYFhQEREYBgQEREYBkREBIYBERGBYUBERGAY\nEBERGAZERASGARERgWFARERgGBARERgGREQEIMrMN6+urkZRURFsNhsyMzORnJxsyLpERGQsU88M\nzp07h9zcXKxZswZnz541bF0iIjKWqWEQHx+vtmNiYgxbl4iIjGVqGCiKorYD7eCDWZeIiIxlahi0\nt7ebsi4RERnLovgekhvswIEDyM7OBgB8+umneO655wAATqcTFosFDocj4Lo9KSoqMqlqIqLItnjx\n4m7LTA2DqqoqnDhxAoqiYMmSJUhKSgIA/OEPf4DFYsGmTZsCrktEROYzNQyIiEgOvOmMiIgYBkRE\nNMDDwOv1SjeKScaafcleP1GkMnU6inB2+PBhfP3118jKyoLdbu913XCZKiOYmouLi1FTU4Po6GhM\nmjQJTz31VK/LQyGY+gHg9u3bKC0thc1mw7JlyxAdHY1t27ZhwoQJUBQFd+/excsvvxyCyrtzuVy4\ndu0avF4vZs+ejZSUlB7XDZftJ5iaw3H7CaZ+ILy3n2C+x5BtP8oA5nQ6lX//+98B1/vkk0/U9v79\n+80sKSC9Nfs6dOhQUMvNpLf+e/fuKU6ns9vyhw8fqu3Dhw8bWlswTpw4obYDfY/hsv0EU7OvcNl+\ngqk/3LcfX+Gy/QzobiK9ZJ0q4/bt2/jVr34Fj8eja3k4cTqd8Hg82LdvH8rKytTlcXFxAIC6ujqM\nGDFCVHlYsGCB7nXDZfsJpmYg/LafYOoP9+0H0P89hmr7GbDdRMFQJJ0qIzU1FT/96U9x//59XcvD\nSXl5OUaOHImcnBwcOnQI6enpsFq/OXZxOp2YM2eOwAo7HD16FLNmzep1nXDbfvTUDITv9qOnfhm2\nH73fY6i2H54Z6CDzBc+oqChcuXJF9/JwERUVhczMTADAk08+Cbfb7fdzj8eD2NhYAZV94/jx40hP\nTw94g2Q4bT96a+4UbtuP3vpl2H4Afd9jqLYfhkEXTqcTLpfLb5nvaVw4dq1o1dzQ0KC2q6qqAi4X\nSav+8ePH49atWwAAt9uNIUOGqD/zer1+R3kinDp1CklJSUhLS/NbHs7bTzA1h+P2E0z94b799PQ9\nitx+Bmw30WeffYZbt25h0KBBSE1Nxfz58wF0XOXvOm9SRkYG8vLy1KkyRAmm5jNnzqClpQVtbW2Y\nNm1awOXhVv+MGTOQn5+PK1euIDExEVFR32yqFRUVGD9+fEhr91VbW4tTp05h4sSJuHbtGh48eID1\n69cDCN/tJ9iaw237Cbb+cN5+gJ6/R5HbD6ejICIidhMRERHDgIiIwDAgIiIwDIiICAwDIiICw4CI\niDCA7zMg6smJEydQWVmJlJQULFy4UHQ5RCHBMCDqYuHChaivr4fT6RRdClHIMAwoYp0/fx7/+Mc/\n8OKLL6K4uBjx8fFwOBxITEzE0aNHER8fj7a2NtjtdsyePdvvd3u6F/PkyZOoq6tDdHQ0WlpasGzZ\nMjz++OO4fPky9u3bh8WLF2PevHnq60WLFmH+/Pmora3F8ePHERMTg9bWVkycOBFTp04FAHzxxRc4\nevQo0tLSkJCQgNbWVuTk5Kif+dVXX+HSpUuIioqC1+tFVVUVXnnlFQBAWVkZSktLERcXh5aWFsya\nNQvjxo0z6RuliGba5NhEYeBvf/tbt3nr//SnPymtra3q6zNnzii3b9/2W6e2tlY5fvy43zKXy6Wc\nPn1aff3o0SNl79696uuTJ08qlZWV6s8KCgp6rOuvf/2r3+v33ntPbV+4cEG5ceOGoigd8+/n5+f7\nrXvmzBlFURSlqalJ2b17t9/P9u7d6/dvI9KLZwYU0SwWC5599lm/ZdXV1Th06JD62uv1Ijo6GmPH\nju31vW7evImWlhZ88sknADrOHpqamtSfz507FwUFBVi9ejXOnDmDefPmqT+rq6vDqVOnYLPZYLVa\nce/ePb/3fuKJJ9R2XFwcHj16BKBjErOuE7PNnTsXAFBTU4P79+/71ePxeNDQ0KB7VlKiTgwDimiK\nRnfPhAkTsHLlSr/Jy9ra2gL+7tSpU3Hnzh2/LiXf37PZbEhISMD9+/dRV1fnt0M+cuQI1q1bp86W\nuWfPnoB1AsCoUaNQUlKCp59+Wl32r3/9CyNGjEBKSgqSk5ORlZWl/szr9Wq+D1EgnKiOIlZhYSFK\nSkrUvvnp06cjNTUVTU1NOHbsGGw2GxRFQXNzM77zne9g3LhxqK6uxtmzZ9HU1ITa2lqkpaUhPT0d\nkydPBgCUlJSgoqICVqsV7e3tsFgsyM7OVj+zqakJv/3tb5GVleU38+Tnn3+OxsZGWCwWtLa24ubN\nm1i6dCnVtmAbAAAAf0lEQVRmzpyJ48eP4/Tp09iyZQuSkpKwe/dueL1ebNiwAQBw9epVXL58GTEx\nMWhra8Pw4cOxaNEiAB3B8MUXX6jB9uDBAzz//PNISEgIyXdMkYNhQEREvOmMiIgYBkREBIYBERGB\nYUBERGAYEBERGAZERASGARERgWFAREQA/h8WKNOvRMn3ugAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x111ecc8d0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='cv_cos_sim_st_bl', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF-IDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "TfidfVectorizer(analyzer='word', binary=False, decode_error='strict',\n",
       "        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',\n",
       "        lowercase=True, max_df=1.0, max_features=1000, min_df=1,\n",
       "        ngram_range=(1, 3), norm='l2', preprocessor=None, smooth_idf=True,\n",
       "        stop_words='english', strip_accents=None, sublinear_tf=False,\n",
       "        token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b', tokenizer=None, use_idf=True,\n",
       "        vocabulary=None)"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tiv = TfidfVectorizer(ngram_range=(1, 3), stop_words='english', max_features=1000)\n",
    "tiv.fit(df['search_term'] + ' ' + df['product_title'] + ' ' + df['product_description'] + ' ' + df['bullet'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tiv_of_st = tiv.transform(df['search_term'])\n",
    "tiv_of_pt = tiv.transform(df['product_title'])\n",
    "tiv_of_pd = tiv.transform(df['product_description'])\n",
    "tiv_of_bl = tiv.transform(df['bullet'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TF-IDF based Cosine Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tiv_cos_sim_st_pt = [cosine_similarity(tiv_of_st[i], tiv_of_pt[i])[0][0] for i in range(tiv_of_st.shape[0])]\n",
    "tiv_cos_sim_st_pd = [cosine_similarity(tiv_of_st[i], tiv_of_pd[i])[0][0] for i in range(tiv_of_st.shape[0])]\n",
    "tiv_cos_sim_st_bl = [cosine_similarity(tiv_of_st[i], tiv_of_bl[i])[0][0] for i in range(tiv_of_st.shape[0])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['tiv_cos_sim_st_pt'] = tiv_cos_sim_st_pt\n",
    "df['tiv_cos_sim_st_pd'] = tiv_cos_sim_st_pd\n",
    "df['tiv_cos_sim_st_bl'] = tiv_cos_sim_st_bl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sns.boxplot(x='relevance', y='tiv_cos_sim_st_pt', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sns.boxplot(x='relevance', y='tiv_cos_sim_st_pd', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "sns.boxplot(x='relevance', y='tiv_cos_sim_st_bl', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Jaccard Similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def jaccard(A, B):\n",
    "    C = A.intersection(B)\n",
    "    return float(len(C)) / (len(A) + len(B) - len(C))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['jaccard_st_pt'] = df.apply(lambda x: jaccard(set(x['tokens_search_term']), set(x['tokens_product_title'])), axis=1)\n",
    "df['jaccard_st_pd'] = df.apply(lambda x: jaccard(set(x['tokens_search_term']), set(x['tokens_product_description'])), axis=1)    \n",
    "df['jaccard_st_br'] = df.apply(lambda x: jaccard(set(x['tokens_search_term']), set(x['tokens_brand'])), axis=1)\n",
    "df['jaccard_st_bl'] = df.apply(lambda x: jaccard(set(x['tokens_search_term']), set(x['tokens_bullet'])), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1954d6748>"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3W1QVOfZB/D/8hZcjQgiUkIVCTJKNdJgiTFqqJhq0USq\naERjkvE1k6Qfnk6cTvKMcVKn06Zm5plpopPYau3YiKlgTVS0VlIUodb3irvgSxSNsagIaBRBYM/z\nwdmTXTy4Z+Gcvc+9/H+fLg6Hs1c2x732fjn3bVMURQEREfVoIaITICIi8VgMiIiIxYCIiFgMiIgI\nLAZERIQAFAOXy4X29nazX4aIiLohzMyL7969GxcuXMD06dORkJDw0HNra2tRUlKC0NBQZGVlIT4+\n3szUiIjIg83s5wycTif69evnsxh88cUXeOGFFwAA27ZtQ25urplpERGRB8uMGdjtdjWOiIgQmAkR\nUc9jmWLg2UBhMSAiCizLFAMOMhMRiWPqAHJnHA4HbDYb0tLS1GPNzc2acWdKSkpMyY2IKNhlZ2c/\ncMzUYrB3716cP38evXr1QlJSEsaPHw8AqKioeKAYZGZmoqCgAIqiYNKkSbqu/+STT5qSN1FPVFxc\njI0bNwIA5s+fj5ycHMEZ+cfpdGLlypUAgOXLl3t9vshg2bJluHz5MgAgMTERq1atMuV1jh07pnnc\n9NlEZikpKWExIDLYwoULAQDr1q0TnEnX/OpXvwIAvPvuu4Iz6Zr8/HwAQEFBgWmvcezYscC3DIhI\nLjNnzhSdQrfk5eWJTqFbEhMThb02iwERqWTrGupItq6hjszqGtLDMrOJiIhIHBYDIiJiMSAiIhYD\nIiICiwEREYHFgIiIwGJARERgMSAiIrAYEBERWAyIiAgsBkREBBYDIiICiwEREYHFgIiIwGJARERg\nMSAiIrAYEBERWAyIiAgsBkREBBYDIvJQXFyM4uJi0Wn0WCLf/zAhr0pEllRUVAQAyMnJEZxJzyTy\n/WfLgIgA3P9W2tTUhKamJrYOBBD9/rMYEBGA776VdowpMES//ywGRETEYkBE982cOVMzpsB45pln\nNONAYTEgIgD3By3tdjvsdjsHkAW4fPmyZhwonE1ERCq2CHoutgyISJWTk8NWgSB5eXmacaCwZUBk\nIKfTCQBIS0sTnEnXMH9x0tLSEB0drcaBxpYBkYEKCwtRWFgoOo0uY/5iNTY2orGxUchrsxgQGcTp\ndKKqqgpVVVXqN1SZMH+x1q9fD0VRoCgK1q9fH/DXZzEgMojnN1IZv50yf7FKSko040BhMSAiIhYD\nIqOIng3SXcxfrOzsbM04UDibiMggaWlpGD58uBrLhvmLtWDBArV7aMGCBQF/fVOLQW1tLUpKShAa\nGoqsrCzEx8d3em5NTQ2OHDkCl8uFMWPGYNCgQWamRmQKGb+RepI9//b2dtEpdEtkZKSw1za1GBw6\ndAjz5s0DAGzbtg25ubmdnltVVaXeiDt37mQxICnJ+I3Uk+z5nzlzRnQK3dLU1CTstU0dM7Db7Woc\nERHx0HPb29vhcrngcrmEVkciktMHH3ygGcti2bJlmnGgmNoyUBRFjX0VgxEjRmDVqlWw2WyYM2eO\nmWkRURA6evSoZiyLoF6ozp/+u//85z/45S9/CUVRUFRUpKubyP3oNhFRR7J/PgQ6f1OLQXNzs2bs\ncDhgs9m8+icfeeQRAIDNZkPfvn11Xb+hocGgTIlIdhkZGWqLICMjQ7rPh8TERLVFkJiYGPD8TR0z\nyMzMREFBATZt2oTMzEz1eEVFBSoqKrzOTUpKwtatW7F161YMGTLEzLSIKAi99dZbmrEsVq1apRkH\niqktg4SEBOTn5z9wfPHixQ8cGzZsGIYNG2ZmOkQU5DIyMkSn0GUdB8ADXdD40BkRBQ0ZWwRuogfA\nuRwFERGxGBARWYFnF5eI7i4WAyIDOZ1OKdfSd1u6dCmWLl0qOo0uW7FiBVasWCE6jS4RPQDOMQMi\nA7nX0X/33XcFZ9I1t27dEp1Ct8i+HIVIbBkQGUT2nbY8WwQytg48WwQytg48Z15qzcI0G4sBkUFk\n32nLs1UgYwvBs1XAFoL/WAyIiIjFgMgosu+05bkMjN4lYawkNTVVMyZ9WAyIDOLeaWv48OFS7gvw\nySefaMayeO+99zRjWRQUFGjGgcJiQGSgvLw8KVsFALwGvWUcAGf+3cNiQGSgmpoa1NTUiE6jS2Qf\nAGf+3cPnDIgMVFRUBADIyckRnAmRf9gyIDJIcXExmpqa0NTUhOLiYtHp+C0xMVEzloXs+Z87d04z\nDhQWAyKDuFsFHWNZlJeXa8aykD3/1tZWzThQWAyIiIjFgMgoM2fO1IxlwfzFcm/92zEOFBYDIoPk\n5OTAbrfDbrdLOYAse/6ip2Z214QJEzTjQOFsIiIDyfiN1JPM+YveKay79u7d6xUvWLAgoK/f5WJw\n9epVDBw40MhciKQn4zdqT7LnLzNFUTTjQNHdTdSx2VVRUWF4MkREXSV6p7Duio6O1owDRXcxOHv2\nrNfPoaGhhidDRNRVoncK664333xTMw6ULncTuVwuI/MgA7lbcTIulgYA69evB4CA95kaYdmyZQCA\nVatWCc6ka1599VUAwIYNG4Tm0VUhIfLOifH89yri367PYrBjxw7cvXsXp0+fRmtrKxRFgc1mQ1xc\nXCDyoy6QfevFkpISAHIWg8uXL4tOoVtaWlpEp9At/JLadT7L6LRp0zBr1iyMGjUKeXl5mDVrFvLy\n8oRMfSLfZN96cf369XC5XHC5XGoLQRbuVkHHWBbuVkHHWBYLFy7UjGUhzbaXP/3pT83MgwwieuXD\n7nK3CjrGMvBsFcjYQvBsFcjYQmhqatKMSR/dxSAsLAzNzc04f/48mpubzcyJiIgCTHcxOHXqFL74\n4gvcunULO3bsQHV1tZl5URfJvvVidna2ZiwD2VfNFL0cQnfZ7XbNmPTRXQwqKysxe/ZspKenIy8v\nDydPnjQzL+oi2bdeXLBgAUJCQhASEiLdALLnDCIZZxN5ziCScTbRunXrNGNZiN72UvfU0l69enn/\nYRhXsrAqGVsEnmRrEXiKjY0VnUKXdVzbR8YvEzK2aKxC9yf6t99+C5fLhZCQELhcLnXcYNeuXRxc\nthgZ/xF7kq1F4GnAgAGiU+iyjpMPZJyanJycLDqFbhk+fLiw19ZdDBoaGvD2229j5MiRqKysxOOP\nP44tW7bg9OnTLAZkKFkfmnNP63XHsuUvO9nff9H56x4zSE1Nxfvvv4+XXnoJ77//PpYsWYJZs2Zh\n9OjRZuZHPVBhYaGU02Jln9Yr++QD2d9/0fnrLgZTpkzp9PiJEycMS4h6Npkfmquvr9eMZXHw4EHN\nWBbub9UdY1mIzt+QhTwuXrxoxGWIhH876o7GxkbNWBYyP/BH3WdIMRCx9jaR1Xiu5Cvjqr6i19Mn\nsQwpBjabTfN4bW0tPv30U2zevBm1tbU+r1NTU4Nt27Zh+/btaG1tNSI1kozM/day78Hbr18/zZh6\nBlPXez106BDmzZuHOXPm+OyDvHnzJpqampCbm4vnn38e4eHhZqb2UE6nU7r+6mCRlpaGXr16oVev\nXtLNBsnJyVEfmJNxx7D4+HjNWBayP4HsOa1UxBRTU4uB5/+QiIiIh57rcDjQ3NyMrVu34syZM2am\n5ZOss1mCxd27d3H37l3RaXSJe8VVGcncKgPkb5mJfv9NHTPwPO6rGFy8eBGNjY2YMWMGzp49K+wf\nlMyzWYLBihUrNGMZfPDBB5qxLFauXKkZy2Ljxo2asSxEv/+GFIP09HTN4+3t7bqvERYWhqysLADA\nY489hoaGBiNS85vMs1mCgWerUHQL0V9Hjx7VjIlk0OUFhiorKzFy5EgAQFJSkuY5nktde8YOhwM2\nm82rT3jo0KE4f/48UlJS0NDQgBEjRvjMwYxNoz3XXAoLCxOyMTV9R+b3X+bcAeYvWqDz110M9u/f\n77W72YULF9Ri0JnMzEwUFBRAURRMmjRJPV5RUfFAMUhPT0dhYSFOnTqF2NhYXQvhmdF6yM3NRWVl\npRqLaqH0VKmpqWqLIDU1Var3PyMjQ20RZGRkSJW7FuYvVqDz191N1JUnKhMSEpCfn4+5c+d67Zm8\nePFiLFq06IHz8/LykJubi3Hjxvn9WkaRfQlo2b333nuasQw8ZxDJOJto/vz5mrEsli9frhnLIiMj\nQzMOFN3FoOOzBDJui6dXXl6elLMpgoV7aqlsZB9vKioq0oxlIfv7L3rMyWdfzI4dO3D37l2cPn0a\nra2tUBQFNpsNo0aNCkR+QrBFII7T6VSnlcq28qTMg9+A/HsIi17bR3Y+i8G0adMAPDhmQGQGmdfU\n95w9589MOiIr0N1NxEJARBS8TH0Cmchfop/C7I6O05KJZKK7GGzYsAGXLl3C4cOHsWnTJhw4cMDM\nvKiHqqmp0YxlEBISohkTyUD3Hdu/f38MGjQIV69exdy5c3Ht2jUz86IeSuYZLffu3dOMiWSguxi4\n1xlyN3/5zYfMwEFYIjF0f6J/++23+Prrr9G/f//7f8hiQCbgmvpEYuj+RB83bhwqKyvxox/9CCdO\nnMCVK1fMzIt6qJiYGM1YBp4PZna24RORVekuBoMHD1YfsU9PT8drr71mWlLUc8k8mygqKkozloXn\nhlIiN5fqKg7gd48h79jevXuNuAyR1GtD9e3bVzOWRe/evTVjWURGRmrGpI8hk6Fv375txGWIAMjX\nInCTvZvo5s2bmrEsYmNjcenSJTUm/xjSMpDxxifrSktLk65VAMi/B6/sxeyVV17RjEkfdqwRGUTm\n8Q4AyM7O1oxlkZaWhkGDBmHQoEFSfpl47rnnNONAYTHQ4HQ6pd7/uLi4GMXFxaLT6LK5c+di7ty5\notPwm+cHkIwfRgsWLNCMZXLp0iW1q0g2ot9/LqCiwb1ypkwrZnpyP7kr4wYrwHcPOBL1NCJaBG5s\nGXTgdDpRVVWFqqoqKVsHxcXFaGpqQlNTk5StA88WgWytg/z8fM1YFsxfvAULFghrlXW5GFy8eFGN\nZexf7IzsuyXJvLYP4N0qYAuBKHB8dhNt3bpVjV0uF4D7/0jPnTuHt99+GwDQp08fk9IjIgoOy5Yt\nw+XLlw25VmJiIlatWmXItdx8FoPevXtj8uTJKC8vR3h4ONLT03H8+HFER0cbmohV5OXlYeXKlWos\nm5kzZ2Ljxo1qLBubzaa2CGSc3kjUGb0f3vn5+SgoKDA5mwf57CaaPHkyAKC2thaZmZmIiIjAU089\nFbQPmsn8BCxwf9DYbrfDbrdLOYC8adMmzVgGnv+ARfxj7i7m37Ppnk3U8Vuau8soGMnYIvAkY4vA\nE1sERIGnuxi0t7ejrq4OsbGxqKurC+rNO2RsEXiSsUXgSbYWgaeBAweKTqFbBg0aJDqFbgnW7utA\n0F0Mfvazn2H37t24ffs2+vTpI/23ZyKjOZ1OXL16VY1l+1LhdDrVB7Zkzb+hoUGNZctfNN1TS8PC\nwjBt2jTMmTMH06ZN44bfZBpZnwBfu3atZiyLP//5z5qxLD766CPNmPTRXQzKy8vNzINIVVhYKOUz\nHu5WQcdYFp7THo2aAhlI7lZBx5j00V0M6urqvH4+cuSI4ckQyf4EuMw8J4UE8wQR0qa7GNy9e9dr\n0Pibb74xJSErkLWbwk3mhepkfwKcSFa6O/6TkpKwdu1aTJw4EYCczWC9uFAdkfX4+wSvr/WJzHiK\nV2a6i8HJkyfVQhDM3N0U7li2GQnuhercsWwFob29XTMm0vPB7S4AfOjMf7q7iX7yk5+oO1ClpaUh\nPT3dzLyEkb2bQvaF6s6cOaMZE+mRkZEhOgVp6S4GSUlJXj9nZmYanQsRUbe89dZbolOQliFLWAcT\n2bcujImJ0YyJiB5G95jBjRs3UFZWpk45q66uxjvvvGNaYqK4F6pzx7KRfa44EYmhuxjs2rULL774\nInbv3q2uZBqsZGwREBF1h+5uoj59+iA8PByKoiAiIgIhIcG7Y6Z7kFxGngNoHEwjIr10f6K3tbUB\ngPrgmZ5lhmtra/Hpp59i8+bNqK2t9Xl+c3MzfvOb3+DKlSt606IOPAfQZBxMs9vtmjERmUt3N9Gw\nYcMAACNGjMBnn32m628OHTqEefPmAQC2bduG3Nzch55fWlpqiS4o99PHsrYO+CFKRP7S3TIYMWIE\ngPtFYfr06XjxxRd9/o3nh1JERMRDz21oaIDdbkdkZKTelEwj60Jpbk1NTeqDZ7Lx3JhHtk16+vbt\nqxkTycDUjn/3XraA72Jw4MABjBs3zsx0dJF9obQVK1ZoxrJw79/cMZbBrVu3NGNZzJ8/XzOmnkF3\nMVizZg0qKioAANevX8eXX37p82/8WU7g6tWr2L59Ow4cOIBTp07p/jujyf4EMp/gpa7yXLpEtmVM\nqPt0jxkkJCRg7NixAIDvf//7+Pe//+3zb5qbmzVjh8MBm83m1Se/aNEiAPe/mffr109XTmZscee5\naU9YWJj02+gxf3GsmPvSpUt1PTDqa5E3ABg8eDA++eQTI9IynBXfe3+IyF93Meg4lVTPTmeZmZko\nKCiAoiiYNGmSeryiouKBYgAA9fX1OHDgAJKSkpCQkODz+mZsYJGbm4vKyko1lm2TjMTERPVhs8TE\nROny70jm/K2Y+29/+1uf5+Tn5+te6M2K/42AdfPSS0T+uouBe2qpm+feBp1JSEjQ/IaxePFizfNj\nYmKwZMkSvSmZIi0tTS18Ms4mevTRRzVjWSxfvhwrV65UYyIKDN1jBk888QSKiopw/PhxFBYWYuTI\nkWbmJUxxcTFcLhdcLpeUG8S4l9/uGMvCXQg6xkRkLt3FICUlBVOnTkVUVBSmTp2qrt8TbGRfApqI\nqCt0dxMBQGRkJJKTk83KhYiIBNHdMigsLMS//vUvAPenLLoHWYPNM888oxnLIjQ0VDMmInoYvx46\ne/rppwEAqampOH36tCkJiSb7EtCxsbGaMZnPcwYOt10k2ejuJuq4TAS/dVrT9evXNWMyht5N2fXM\n0+eG7GQlXZ5a6rnURDDJy8tTZ7HIuK9BeHg4Wlpa1JiMpXdTdrYMSDa6u4ni4uJw+PBh3Lt3D0eO\nHJH+Cb/OuHc6Gz58uJTPGcyePVszlgWXsCYSQ3cxGDt2LKKiorBnzx707t0bP/7xj83MS6i8vDwp\nWwWA/OvLrFu3TjMmInP5NbU0NTUVqampZuViGTK2CNw8+6pl7a5gi4Ao8PwqBp6OHj3KbRXJFGwR\nEAWe7mJQWVmJqqoqNDY2om/fvqipqZGyGOidDaIHZ4P4j+8/kTXpLgbV1dWYPXs2tm/fjueffx5b\ntmwxMy/T6P3wkLWLxer4/hNZk+4BZPc0RZfL5fUzWQsffCKirtBdDNxLVre3t8PlcgXtcwZERD2R\n7m6irKwsAMD48eOxadMmXZvPkBhsERCRv3QXg7i4OADAgAED8NJLL5mWED0cB2CJyAxdnlpKYnAA\nlojM4LMY7Nu3DxkZGfjjH/+IQYMGef2ura0NKSkpePLJJ01LkIiIzOezGLgHipOSkpCbm/vA7wsL\nC1kMiIgk57MYuAeObTab9gXC2NNERCQ73VNL+/Xr59dxIiKSh+6v9c8++6zmcXfLgYiop/r56z9H\nXUOdYdfTszmSHrHRsfhwzYe6zmUfDxFRN9U11GFD9iei03jAqyVLdZ/r1x7IREQUnFgMiIiIxYCI\niFgMiIgILAZERAQWAyIiAqeWEpEFvPHGz1Ffb715+jExsVi9Wt88fdmxGBCRcPX1dchfslF0Gg8o\nWDtfdAoBw24iIiJiMSAiIhYDIiICiwEREYEDyERB4bU33sTN+huGXc+o2ThRMf3x8eqPDLkWmcvU\nYlBbW4uSkhKEhoYiKysL8fHxnZ7rdDpRXV0Nl8uFMWPGIDEx0czUiILKzfobeGTR/4hO4wE3//h/\nolMgnUwtBocOHcK8efMAANu2bdPcNtOtrq4OM2bMAAAUFxezGBARBZCpYwZ2u12NIyIiHnruhAkT\nzEyFiIgewtRioCiKGvsqBm579uzB6NGjzUqJiIg0mNpN1N7e7tf5paWlSElJQVxcnK7zo6Oju5KW\nbmZf32zMX7/8l+ahoc56A7DRsf1R8JdPDbmWKD3lPvRnV7FA0pu/qcWgublZM3Y4HLDZbEhLS1OP\nlZWVIS4uDsnJybqv39DQYEyigq5vNubvx2vV3UDokikBez29Gtbu5v9HwfTmb9VtL/Xmb2oxyMzM\nREFBARRFwaRJk9TjFRUVXsXg2rVrKCsrw7Bhw1BdXY3bt2/j5ZdfNjM1IiLyYGoxSEhI0GwuL168\n2OvnuLg4vPPOO2amQkRED8GHzsgQr7/xGhrqbxp6TcP63WOisGb1x4ZciyhYsRiQIRrqb+LHryq+\nTxTgnxuMLVJEwYhrExEREYsBERGxm8hS3nzjNdwwsN/dqD73/jFR+Ih97mSynrSrmBWxGFjIjfqb\n+N+Z1vtf8usi9rmT+bjtpVjW++Qhoi5p4Qqh1A0sBkRBwopLWLNAyYMDyEREFFwtg5+//gbqGuoN\nu55RA7Cx0TH4cM1qQ65F5mlfu1t0CkTCBFUxqGuox8bpL4lO4wHzP/+L6BRIBysuVMcCRYESVMWA\niEiE2OhYSy5hHRsdq/tcFgMiom76cM2Hhl0rPz8fBQUFhl1PLxYDMsw/N9hEp0BEXcRiQIax7kJ1\nLFJEvnBqKRERsWVgNb8uahOdAlHAxcTEWnLph5gY/QOwsmMxsBhrrk3EAkXmWr1a/gFY2Vnvk4eI\n/BYV0x83Lbj0Q1RMf9EpkE4sBkRB4OPVHxl2LX6z7pk4gExERGwZEAFAVEw0blpw6YeomGjRKVAP\nwWJABODj1WsMuxa7WUhGQVcMuCgcEZH/gq4YcNVSIiL/cQCZiIiCr2VAYkTHROGfG26KTkNTdEyU\n6BSILI/FwEL6x0Th10XW+0Dtr+PDdM3qjw19TQ7CEgUWi4GFfGTgByo/TInIHxwzICIiFgMiImIx\nICIisBgQERFYDIiICCwGREQEFgMiIoLJzxnU1taipKQEoaGhyMrKQnx8vCHnEhGRsUxtGRw6dAjz\n5s3DnDlzcPDgQcPOJSIiY5laDOx2uxpHREQYdi4RERnL1GKgKIoa+/qA9+dcIiIylqnFoL293ZRz\niYjIWDbF8yu5wbZt24bc3FwAwI4dOzBt2jQAgMPhgM1mQ1pams9zO1NSUmJS1kREwS07O/uBY6YW\ngytXrmDfvn1QFAWTJk1CXFwcAOAPf/gDbDYbFi1a5PNcIiIyn6nFgIiI5MCHzoiIiMWAiIh6eDFw\nuVzSzWKSMWdPsudPFKx67LaXu3fvxoULFzB9+nQkJCQ89FyrLJXhT84VFRW4evUqwsPDMXz4cDz+\n+OMPPR4I/uQPADU1NThx4gRCQ0MxZcoUhIeHY9WqVUhNTYWiKLhx4wYWLlwYgMwf5HQ6UV1dDZfL\nhTFjxiAxMbHTc61y//iTsxXvH3/yB6x9//jzPgbs/lF6MIfDoXzzzTc+z/v888/V+G9/+5uZKfmk\nN2dPO3fu9Ou4mfTm39jYqDgcjgeO3717V413795taG7+2Ldvnxr7eh+tcv/4k7Mnq9w//uRv9fvH\nk1Xunx7dTaSXrEtl1NTU4He/+x2am5t1HbcSh8OB5uZmbN26FWfOnFGPR0ZGAgCuX7+OAQMGiEoP\nEyZM0H2uVe4ff3IGrHf/+JO/1e8fQP/7GKj7p8d2E/lDkXSpjKSkJPziF7/ArVu3dB23kosXL2Lg\nwIGYMWMGdu7ciZSUFISEfPfdxeFw4OmnnxaY4X179uzB6NGjH3qO1e4fPTkD1r1/9OQvw/2j930M\n1P3DloEOMg94hoWF4dSpU7qPW0VYWBiysrIAAI899hgaGhq8ft/c3IxHHnlEQGbfKS0tRUpKis8H\nJK10/+jN2c1q94/e/GW4fwB972Og7h8Wgw4cDgecTqfXMc9mnBW7VrRyrq+vV+MrV674PC6SVv5D\nhw7F+fPnAQANDQ2IiopSf+dyuby+5YlQVlaGuLg4JCcnex238v3jT85WvH/8yd/q909n76PI+6fH\ndhPt3bsX58+fR69evZCUlITx48cDuD/K33HdpMzMTBQUFKhLZYjiT87l5eVoaWlBW1sbnnjiCZ/H\nrZZ/eno6CgsLcerUKcTGxiIs7Ltb9dKlSxg6dGhAc/d07do1lJWVYdiwYaiursbt27fx8ssvA7Du\n/eNvzla7f/zN38r3D9D5+yjy/uFyFERExG4iIiJiMSAiIrAYEBERWAyIiAgsBkREBBYDIiJCD37O\ngKgz+/btw+XLl5GYmIhnn31WdDpEAcFiQNTBs88+i7q6OjgcDtGpEAUMiwEFrSNHjmDXrl2YP38+\nKioqYLfbkZaWhtjYWOzZswd2ux1tbW1ISEjAmDFjvP62s2cx9+/fj+vXryM8PBwtLS2YMmUKHn30\nUVRWVmLr1q3Izs7GuHHj1J8nTpyI8ePH49q1aygtLUVERARaW1sxbNgwjBw5EgBw+PBh7NmzB8nJ\nyejTpw9aW1sxY8YM9TXPnTuHkydPIiwsDC6XC1euXMHrr78OADhz5gxOnDiByMhItLS0YPTo0Rgy\nZIhJ7ygFNdMWxyaygL/+9a8PrFv/pz/9SWltbVV/Li8vV2pqarzOuXbtmlJaWup1zOl0KgcOHFB/\nvnfvnrJlyxb15/379yuXL19Wf1dUVNRpXp999pnXzx9//LEaHz9+XDl79qyiKPfX3y8sLPQ6t7y8\nXFEURblz546yceNGr99t2bLF67+NSC+2DCio2Ww2TJ482etYbW0tdu7cqf7scrkQHh6OwYMHP/Ra\nX331FVpaWvD5558DuN96uHPnjvr7sWPHoqioCLNnz0Z5eTnGjRun/u769esoKytDaGgoQkJC0NjY\n6HXt/v37q3FkZCTu3bsH4P4iZh0XZhs7diwA4OrVq7h165ZXPs3Nzaivr9e9KimRG4sBBTVFo7sn\nNTUVU6dO9Vq8rK2tzeffjhw5Ev/973+9upQ8/y40NBR9+vTBrVu3cP36da8P5L///e+YO3euulrm\n5s2bfeYOftN9AAABMUlEQVQJAN/73vdw7Ngx/PCHP1SPff311xgwYAASExMRHx+P6dOnq79zuVya\n1yHyhQvVUdAqLi7GsWPH1L75UaNGISkpCXfu3ME//vEPhIaGQlEUNDU14amnnsKQIUNQW1uLgwcP\n4s6dO7h27RqSk5ORkpKCH/zgBwCAY8eO4dKlSwgJCUF7eztsNhtyc3PV17xz5w5+//vfY/r06V4r\nT3755Ze4efMmbDYbWltb8dVXX+G5555DRkYGSktLceDAASxZsgRxcXHYuHEjXC4XXnnlFQBAVVUV\nKisrERERgba2NsTExGDixIkA7heGw4cPq4Xt9u3beOGFF9CnT5+AvMcUPFgMiIiID50RERGLARER\ngcWAiIjAYkBERGAxICIisBgQERFYDIiICCwGREQE4P8BX8IcB5S4ceQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1953d5748>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='jaccard_st_pt', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x14f953ef0>"
      ]
     },
     "execution_count": 141,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEPCAYAAAC3NDh4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAH5BJREFUeJzt3X9QFPfdB/D3cYCAKIJICKFyIqF68We0ahxRqrZStZUS\nxlFJTKateTpJ25mn0/6ReWbyT5qZTtOZZ6apjkk701iraPhR0yoSFesvqCOKRuRA6w9EYxEQFAFB\n4O75w+e2B+ze7XG7t/vl3q+/PiybvQ+XdT/73e/uZy0ul8sFIiIiGWFGJ0BERObFIkFERIpYJIiI\nSBGLBBERKWKRICIiRSwSRESkKFzPjTc3N6OiogJWqxXZ2dlITk5WXNfhcKChoQFOpxNLlixBamqq\n39sgIiJt6TqSOHfuHAoKCrBp0yacPXvW67ptbW3Iy8tDfn4+Ll++PKptEBGRtnQtEjExMVIcGRnp\ndd3ly5cHvA0iItKWrkXC82FutQf4I0eOYOHChQFtg4iItKFrkRgcHPRr/RMnTiAjIwNJSUmj3gYR\nEWlH14nr3t5e2biurg4WiwV2u11advr0aSQlJSE9PV3VNpRUVFQEkjIRUchatWrViGW6FolFixah\nsLAQLpcLq1evlpZXVVUNKRItLS04ffo0ZsyYgYaGBnR1dWHr1q1et+HNyy+/rP0fQ0Q0htXU1Mgu\nt4y1LrAVFRUsEkREfqqpqZEdSfBhOiIiUsQiQUREilgkiIhIEYsEEREpYpEgIiJFLBJERKSIRYKI\niBSxSBARkSIWCSIiUsQiQUREilgkiIhIEYsEEREpYpEgIiJFLBJE5JPD4YDD4TA6jVETPX8jsUgQ\nkU/FxcUoLi42Oo1REz1/I7FIEJFXDocD9fX1qK+vF/JsXPT8jcYiQUReeZ6Bi3g2Lnr+RmORICIi\nRSwSRORVfn6+bCwK0fM3WrjRCRCRudntdsycOVOKRSN6/kZjkSAin0Q/Axc9fyOxSBCRT6KfgYue\nv5E4J0FERIpYJIiISBGLBBGRyRnZVoRFgoh8Yu8jY+3atQu7du0y5LNZJIjIJyMPUqHO4XCgqakJ\nTU1NhhRqFgki8srog1So8yzORhRqFgki8srog1Soa2trk42DhUWCiLwy+iAV6qZMmSIbBwuLBBF5\nZfRBSgsiT7xv3bpVNg4WFgki8srog5QWRH7pkN1uR1paGtLS0gx5cpxtOYjIK/dByh2Lxv3SIXcs\n4t9gZHFmkSAin0QdQQAjXzr03nvvGZjN6BhZ2Hi5iYjI5PjENRGZmsjX9MfCS4eM/P55uYmIvBL9\nmr7oLx0y+vvnSIKIvBp+TV9E+fn5Qo8i5OJg4UiCiLzq7u6WjUUi4gjCLDiSICKvLBaLbEzBYfSc\nCosEEXkVExMjG1NoYJEgIq+MPpPVgshtOYyek2CRIKIxT+RbeI2eE2KRICKvjD6TDZT7FtL6+noh\nRxNGzwmxSBDRmCZ6kTN6TohFgoi8GgtzEiIz+vtnkSAir9xPLM+cOVPI5w2MPsgGyujv31QP0zmd\nTrhcLlitVqNTIdKU+1q4iAdZQMyDq5vobTkAY79/XYtEc3MzKioqYLVakZ2djeTkZMV1y8vLcevW\nLWzYsAEpKSnS8oMHD2JwcBAAMH36dMyaNUvPlIl08ec//xkA8Otf/9rgTEKTyEUOMLa46Vokzp07\nh4KCAgDAgQMHkJubq7huTk6O7J0HUVFRWL16tW45EunN4XDg9u3bUizi2ax7wlfEdzEA4o4gzEDX\nOQnPmfjIyMhRbWNgYAClpaUoLS1FTU2NVqkRBY17FDE8FoXot5BSYHQtEi6XS4pHWyRycnKQl5eH\nvLw8tLS0aJUaUdC0trbKxqIQ/RZSCoyul5vccwlaCQ9Xl258fLymn0sUiIkTJ6Knp0eKRds/Pf/d\nhYeHC5f/WPDll18CAObOnRv0z9a1SPT29srGdXV1sFgsqq4TNjY2wmazAQD6+vpUfW5HR4d/iRLp\naNy4cUNi0fbP3Nxc1NbWSrFo+Y8FO3bsAGDMjQ+6FolFixahsLAQLpdryORzVVXViCJx7Ngx3Lx5\nE9HR0bDZbMjKygIA3LlzB9XV1bBYLJg/f76e6RLpwugnZgNlt9sxdepUKabgMvrGB12LREpKCjZv\n3jxi+bZt20YsU7qDyV0siESVn5+P999/X4pFxPdIGGf4jQ/BHk2Y6mE6orHIbrcjLS1NikVj9Jms\nFsrKygAAa9euNTgT/xl94wPbchAFgcvlGnK3n0jGwt1NJSUlKCkpMTqNUUlMTJSNg4VFgkhnDocD\nTU1NaGpq4nMGBigrK0NPTw96enqkEYVI3njjDdk4WFgkiHQm+pm46A3yPEcQIo4m3DcOTJ06lQ3+\niMh8xkKDPNEZMYJw40iCSGein4kDz/IWNfdXX31VNhZJY2MjGhsbDflsjiSIdDYWzsRFzRt4dkfT\nX/7yFykW0WeffQbAmPw5kiAKApHPxEXncDiku8tEvHGgrKwMfX196OvrM2TifdRF4osvvtAyD6Ix\nzW63C302LjLRu/C6RxHD42Dxebnpgw8+wMyZM9HZ2YmrV69i9uzZ+PLLLzFt2rRg5EdEFBCjH0YL\nVH9/v2wcLD5HEq+99hry8vIQHR2NX/3qV9iyZQs++OADTJw4MRj5EREFJDY2VjYWheex1ojjrs8i\n4W4nEBERIb17Ojw8HOPHj9c3MyIyDYfDIeT1fGBo3ykRe1AN7yIcbKrnJDxbfQPA48ePNU+GiMyp\nuLhYyAcBgaHHKhGPW0bnr/oW2Hnz5mHfvn3IzMzEtWvX2LabKES4X1/qjkWbgJ8wYYL00qcJEyYY\nnI3/YmNjpfyNuFymeiRht9uRm5uLiRMnYsOGDXjppZf0zIuITEL0tiKejRVFbLIYHR0tGweL37fA\nhoWFCXldj4hGp7u7WzYWxaNHj2RjURhd5FQXiStXruBvf/sbOjs7cfDgQTQ0NOiZF9EIIk+eikz0\niV/RGf39qy4StbW12LhxI+bNm4f8/HxcvnxZz7yIRhB58lRkor9+9bnnnpONRWH096+6SAy/FhYe\nzrZPFDzuydP6+nqOJoIsNTVVNhaF0e9jCNTChQtl42BRXSQeP34Mp9MJAHA6ndItsYcPH9YnMyIP\nok+eiuz48eOysSg8+x2J+NIhzxZIRrRDUj0c6OjowLvvvovZs2ejtrYW06dPR1FREa5evYrvfOc7\neuZoGu4zWNFuASQKxODgoGwsigsXLsjGomhra5ONg0V1kcjMzMTPfvazEcvLy8s1TcjM3Gew7733\nnsGZhJ78/Hy8//77UkzBY7FYpLtqOHEdfBEREejr65PiYFN9uSknJ0dx+aVLlzRLyKx4TdxY7ncy\nzJw5kyO5IHvhhRdkY1EYPfEbqClTpsjGwaLJ+yRu376txWZMjdfEjcd3Mhjj3r17srEo3E8rD49F\ncffuXdk4WDS5RUnEpxhJPCKPIDifRaLSZCQRCtcpx8J7isk4Ij/j4dmnTcSebZmZmbKxKBYsWCAb\nBwtfX6oSr4nTaIk+nyX65ZrFixfLxqL4xS9+IRsHC4uEH3hNnEZD9Pms9vZ22VgURr/+UwupqamG\nPcjIOQk/cARBoejhw4eysSiePn0qG4vEyOKsyUhi3rx5WmyGaEwSva2F6DxbCInYTqisrAw9PT3o\n6ekx5InxUReJ2tpaKbbZbFrkQjQmVVZWysaimDRpkmwsiueff142FkVJSYlsHCyqi8SpU6eG/Hzr\n1i3NkzE70VtVi56/qERvayG6r3/967KxKPr7+2XjYFFdJEScsNKayLcxAuLnLyrRn/gVfeJa9JGc\n0VQXieHPQrh7iYQK0W9jFD1/kXn+WxHx343oIyGjz8QD5dmvyZS9mw4ePIiioiLU1taiuLgYRUVF\nKC4uxty5c4ORn2mIfhuj6PmLbMKECbKxKIzuHRQo9ysOhseiePXVV2XjYPE51b9+/XoAz+Ykli9f\nrntCpI8HDx7IxqS/cePGycaimDNnDo4ePSrFojH6HdGiU325KdQLhOhtOUR/GbzIxo8fLxuLQvRr\n+qKPhIS5uynUid6Ww2q1ysakP9FPMET31ltvycakjuoi8emnn6KpqQnV1dXYu3cvzpw5o2depiRy\nWw6jr2uSuES/O2v//v2ysSgSEhJk42BRXSQmT56MqVOn4v79+9iyZQtaWlr0zMuU7Ha7kKMIAFi7\ndi1iYmIQExODtWvXGp3OqJSVlQn5juJdu3bJxqIw+vWZgbp27ZpsLAph3ifhnvBxP9YeFsYrVaIR\nfQThvh4rWpH76quvZGMiEaguEo8fP8adO3cwefJkACwSIhLt4OrJ3b/GHYv0t4j+nAGFNtVH+mXL\nlqG2thbf+MY3cOnSJSFfY0jiMvoOD6JQpXokkZaWhrS0NADPur6y8ysFE8/GiYyhyTWjY8eOabEZ\n0pnIDf5EvsPG89IsL9OSaDTZY7u6urTYjOmJfJAFxG7wJ3L/I9HbQoj+xHhUVJRsTOpoUiSGN/8b\nq0Q+yIre4C8xMVE2Jv2J/sTyxIkTZWNSx1RjX6fTadrrzaIfZEVv8LdixQrZmPRn9H36gfJ8pisU\nn+8KlK5Form5GXv27MG+ffvQ3Nzsdd3y8nJ8/PHHuH///qi3oSfRD7KiO3/+vGxMRPrStUicO3cO\nBQUF2LRpE86ePet13ZycHNkzRH+2QcrYP4iIRkPXIuF5F0pkZKRh29CC6AdZz3YWIra2EPn7591N\nJDLVz0kMd/v2bem5iVWrVsmu49m7fbQHeC22oQV3F1h3LJoLFy7IxqKw2+1ISkqSYpGIfncThTaf\nRaK0tFSK3Tu4y+XC9evX8e677wIAYmNjZf9bLSahR7ON+Pj4gD9Xzptvvqnr9oNJxL/B3VxOxNw9\nMX9jMX//+CwS48ePx5o1a1BZWYmIiAjMmzcPFy9eVJVob2+vbFxXVweLxaLqjFBpG950dHSoWs9f\nU6dO1XX7esrMzJQ6YGZmZgr3N5SVlUknKXv27BGqd9Nwon33wzF/YwU7f59FYs2aNQCe3WXk7iK6\nePFiHDhwwOfGFy1ahMLCQrhcLqxevVpaXlVVNaJIHDt2DDdv3kR0dDRsNhuysrK8boP8I/pLh/bu\n3TskFrlIEIlE9ZzE8Afm1FxbTUlJwebNm0cs37Zt24hlSgVAaRsUWti7icgYqm+1GBwclK4Jt7W1\n4enTp7olRdpbuHChbExE5I3qkcT3v/99lJeXo6urC7GxscLdhhjqTp48OSQW7XJNVFSUNCfF/jtE\nwaO6SISHh2P9+vV65kI6Ev0VlKO5gYGIAqf6clNlZaWeeZDORG/SRkTGUF0khp99sn+OWNhFlYhG\nQ3WRePLkyZDJar7QXSwXL16UjUURHh4uGxORvlT/a7PZbPjkk0+wcuVKABjRrZVIT5GRkRgYGJBi\nIgoO1SOJy5cvSwWCxOPZX0up15aZJSQkyMZEpC/VI4lvf/vbsNls0s+h8spST+6XDYnWYA4AfvCD\nH+Do0aNSLBrRX3xDJCrVIwnPAgE8a5cRakR/falcTETkzaib29++fVvLPExP9NeXfvzxx7IxEZE3\nqovEgwcPcODAAZSWlqK0tBR79uzRMy/TEf31paI/TEdExlBdJA4fPox169YhIiIC69evx4wZM/TM\nizQWEREhGxMReaO6SMTGxiIiIgIulwuRkZEh9xpGkV+fCUB6i+DwmIjIG9V3N7nvUXc/UDe8dfhY\nJ/rrS69fvy4bExF5o7pIuC8vzZo1C/v379ctITMTcQTh5vmucM+YiMgb1UVi1qxZAJ4VC5vNxnbN\ngpk0aZL02sNJkyYZnA0RiSK0JhYCJPJzEsnJybIxEZE3qovEjh07UFVVBQBobW3F8ePHdUvKjER/\nTkL0iXciMobqIpGSkoKlS5cCAL72ta+hvb1dt6TMSPTnJDwn20WceCciY6guEsNveQ21ds3Nzc2y\nsSh++9vfysZERN6oLhLuW2DdPN8tEQoePnwoG4viwoULsjERkTeqhwNz5sxBSUkJ0tPTcePGDcye\nPVvPvEyHt5ASUShSXSQyMjKQmpqKe/fuYd26dYiOjtYzL9MJCwuD0+mUYiKiUODXxEJUVBTS09P1\nysXUOJIgolCk+pS4uLgY//znPwEA165dQ21trW5JmZHoRSIxMVE2JiLyxq/rJq+88goAIDMzE1ev\nXtUlIbMSvYvqkydPZGMiIm9UF4nhbTisVqvmyZhZfHy8bCyK7u5u2ZiIyJtR3wIr4iWXQHgWSfat\nIqJQobpIJCUlobq6Gk+fPsX58+eFPJsOhGdr9FBrk05EoUt1kVi6dCni4uJw5MgRjB8/Ht/85jf1\nzMt0Ojs7ZWMiorHMr1tgMzMzkZmZqVcupuZusz08JiIay0b9VBhbOxARjX2qRxK1tbWor6/Hw4cP\nMXHiRDQ2NmLBggV65kYaslgs0s0GnFOhseSXv/wl7t69q3r9zZs3e/19amoqPvzww0DTUk3L/PXI\nXXWRaGhowMaNG/H3v/8d3/3ud1FUVKRpImYXGxuLrq4uKRYNiwR548+BymwHWTWf5c65sLBQ73T8\nZvb8VRcJ9wNk7v5FIj5QFoienh7ZWBTu/2/DY9KGyAdZwPeByswHWdKX6iLhbg0+ODgIp9MZcs9J\n8CBL3ng7yA4vCjzQBl9hYaHP4mxmRuaveuI6OzsbAJCVlYW9e/ciLi5Or5yIxhTPoiBqgRA1bwqc\n6pFEUlISAGDKlCl47bXXdEuIiIjMgy9GIAoCnomTqHyOJE6ePIkFCxbgj3/8I6ZOnTrkdwMDA8jI\nyMDLL7+sW4JERGQcn0XCPUFts9mQm5s74vfFxcUsEhQws98rThSqfBYJ94S10r314eF+dfYgkuXr\noF5QUDDk9bF79uwJRlpEIU/1nMSkSZP8Wj7WiP7SIdF5FgUWCKLgUT0MWLFihexy90hDdP5c7ujv\n7+flDgOEhYXxGRWiIOO1ov+n5qC+detW9Pf3m/JOlVC4pr9nzx6hH4giEhGLhB82bdqE3bt3G52G\nLLP3fyEiMfE5CT+sXbvW6BQCkpiYaHQKRCQYXUcSzc3NqKiogNVqRXZ2NpKTk/1e9+DBgxgcHAQA\nTJ8+HbNmzdIz5THto48+4uUaIvKLrkXi3LlzKCgoAAAcOHBA9jkLX+tGRUVh9erVeqZJREQKdC0S\nMTExUhwZGTmqdQcGBlBaWgrg2QN9fHCPiCh4dC0Snu3EfRUJpXVzcnKkuLy8XMPsiIjIF12LhHsu\nQat11T7dHR8fr/pzR0Pv7euN+RtH5NwB5m80I/LXtUj09vbKxnV1dbBYLLDb7T7XbWxshM1mAwD0\n9fWp+tyOjo7RpmyK7euN+RtH5NwB5m80I/LXtUgsWrQIhYWFcLlcQyafq6qqRhQJpXXv3LmD6upq\nWCwWzJ8/X890iYhoGF2LREpKiuwtl9u2bVO9blZWli65ERGRb3yYjoiIFLFIEBGRIhYJIiJSxCJB\nRESKWCSIiEgRiwQRESlikSAiIkUsEkREpIhFgoiIFLFIEBGRIhYJIiJSxCJBRESKdG3wR0QUyn76\n9k/R1tGm2fa0ekd9YnwiPtrxkap1WSSIyLTeeeenaG8330E2ISER27f7Psi2dbTh01Ufa/KZWnqz\n4r9Ur8siQTSG/fidn+BR+wPNtqfVQTYuYTJ2bv+9z/Xa29uw+a3dmnymlgo/ed3oFIKGRYLIix+/\n8zYetWv3NjDtDrLx2Ll9h8/1HrU/wLgf/bcmn6mlR3/8X6NTIJVYJIi8eNTeAetbOUanMcKjT8qN\nToFCBO9uIiIiRSwSRESkiJebSFdvv/NjdLQ/0nSbWl3Xj0+Iw47tOzXZFtFYxSJBuupof4Rvvuky\nOg1Z//hU2+JFNBaxSAjgJ+/8GA80PBvX6kx8ckIcfs8zcaIxjUVCAA/aH+F/XjXf/6oPSngmTjTW\nceKaiIgUsUgQEZEiFgkiIlJkvgvdOvjp2++graNds+1p14kxAR/t2K7JtojInPxppmdGIVEk2jra\nsXvDa0anMcLrn//F6BSITE/0ZnrsAktEptYneDM9doE1FosEkQ+DgjfTM2MXWNELVyhhkSDywYxd\nYEUvXCQO3t1ERESKOJIg3f3jU4vRKRDRKLFICOKDkgGjUxg18zb4Y/Ei8oVFQhDm7N0kbuEiInU4\nJ0FERIrMd3pKRDRGJMYnmvKJ68T4RNXrhkyR4NPNFIriEibjkQmfSYhLmKxqvYSERFM+uJaQoO4g\n+9GOjzT7zM2bN6OwsFCz7akVMkWCbTloNOIS4vHIhM8kxCXEq1pv5/bfa/aZRhyktm8X/yArupAp\nEkSjsXP7Ds22xYMUiYgT10REpIgjCdJVfEIc/vGpOV9zGp8QZ3QKRKbHIiGAyQlxpnyf9GQVB9kd\n23dq+pm8ZEMUXCwSAvi9hgdaHmSJyB+ckyAiIkUsEkREpIhFgoiIFOk6J9Hc3IyKigpYrVZkZ2cj\nOTnZ73X92QYREWlL15HEuXPnUFBQgE2bNuHs2bOjWtefbRARkbZ0LRIxMTFSHBkZOap1/dkGERFp\nS9ci4XL952Uzvg7wSuv6sw0iItKWrkVicHAw4HX92QYREWnL4vI8VdfYgQMHkJubCwA4ePAg1q9f\nDwCoq6uDxWKB3W73ua7SciUVFRWa/x1ERKFg1apVI5bpWiTu3buHkydPwuVyYfXq1UhKSgIA/OEP\nf4DFYsGPfvQjn+sqLSciIv3pWiSIiEhsfJiOiIgUsUgQEZEiFgkZTqdTuLuqRMzZk+j5E41VbBU+\nTHl5OW7duoUNGzYgJSXF67pmaRniT85VVVW4f/8+IiIiMHPmTEyfPt3r8mDwJ38AaGxsxKVLl2C1\nWpGTk4OIiAh8+OGHyMzMhMvlwoMHD/DDH/4wCJmP5HA40NDQAKfTiSVLliA1NVVxXbPsP/7kbMb9\nx5/8AXPvP/58j0Hbf1w0Ql1dneurr77yud7nn38uxX/961/1TMkntTl7OnTokF/L9aQ2/4cPH7rq\n6upGLH/y5IkUl5eXa5qbP06ePCnFvr5Hs+w//uTsySz7jz/5m33/8WSW/YeXmwIgasuQxsZG/OY3\nv0Fvb6+q5WZSV1eH3t5elJaW4tq1a9LyqKgoAEBrayumTJliVHpYvny56nXNsv/4kzNgvv3Hn/zN\nvv8A6r/HYO0/vNwUAJegLUNsNht+/vOfo7OzU9VyM7l9+zaee+455OXl4dChQ8jIyEBY2H/Oderq\n6vDKK68YmOEzR44cwcKFC72uY7b9R03OgHn3HzX5i7D/qP0eg7X/cCQRAJEnWsPDw3HlyhXVy80i\nPDwc2dnZAIAXXngBHR0dQ37f29uLcePGGZDZf5w4cQIZGRk+H/w00/6jNmc3s+0/avMXYf8B1H2P\nwdp/WCRUqqurg8PhGLLMczhoxks0cjm3t7dL8b1793wuN5Jc/i+++CJu3rwJAOjo6EBcXJz0O6fT\nOeSs0AinT59GUlIS0tPThyw38/7jT85m3H/8yd/s+4/S92jk/sPLTcMcO3YMN2/eRHR0NGw2G7Ky\nsgA8u+tgeL+pRYsWobCwUGoZYhR/cq6srERfXx8GBgYwZ84cn8vNlv+8efNQXFyMK1euIDExEeHh\n/9mFm5qa8OKLLwY1d08tLS04ffo0ZsyYgYaGBnR1dWHr1q0AzLv/+Juz2fYff/M38/4DKH+PRu4/\nbMtBRESKeLmJiIgUsUgQEZEiFgkiIlLEIkFERIpYJIiISBGLBBERKeJzEkQqnTx5Enfv3kVqaipW\nrFhhdDpEQcEiQaTSihUr0NbWhrq6OqNTIQoaFgkKOefPn8fhw4fx+uuvo6qqCjExMbDb7UhMTMSR\nI0cQExODgYEBpKSkYMmSJUP+W6VnT0+dOoXW1lZERESgr68POTk5mDBhAmpra1FaWopVq1Zh2bJl\n0s8rV65EVlYWWlpacOLECURGRqK/vx8zZszA7NmzAQDV1dU4cuQI0tPTERsbi/7+fuTl5Umfef36\ndVy+fBnh4eFwOp24d+8e3n77bQDAtWvXcOnSJURFRaGvrw8LFy7EtGnTdPpGaUzTrQk5kYl99tln\nI94b8Kc//cnV398v/VxZWelqbGwcsk5LS4vrxIkTQ5Y5HA7XmTNnpJ+fPn3qKioqkn4+deqU6+7d\nu9LvSkpKFPPav3//kJ937twpxRcvXnT961//crlcz95/UFxcPGTdyspKl8vlcnV3d7t279495HdF\nRUVD/jYitTiSoJBksViwZs2aIcuam5tx6NAh6Wen04mIiAikpaV53daNGzfQ19eHzz//HMCz0UZ3\nd7f0+6VLl6KkpAQbN25EZWUlli1bJv2utbUVp0+fhtVqRVhYGB4+fDhk25MnT5biqKgoPH36FMCz\n5m/DG9otXboUAHD//n10dnYOyae3txft7e2qu7wSubFIUEhyyVw2yszMxLp164Y0fRsYGPD5386e\nPRv//ve/h1ya8vzvrFYrYmNj0dnZidbW1iEH6i+++AJbtmyRuo/u27fPZ54A8Pzzz6Ompgbz58+X\nlt25cwdTpkxBamoqkpOTsWHDBul3TqdTdjtEvrDBH4WcsrIy1NTUSNf+586dC5vNhu7ubhw9ehRW\nqxUulws9PT1YvHgxpk2bhubmZpw9exbd3d1oaWlBeno6MjIy8NJLLwEAampq0NTUhLCwMAwODsJi\nsSA3N1f6zO7ubvzud7/Dhg0bhnTyPH78OB49egSLxYL+/n7cuHED3/rWt7BgwQKcOHECZ86cwVtv\nvYWkpCTs3r0bTqcTb7zxBgCgvr4etbW1iIyMxMDAABISErBy5UoAzwpGdXW1VPC6urrwve99D7Gx\nsUH5jmnsYJEgIiJFfJiOiIgUsUgQEZEiFgkiIlLEIkFERIpYJIiISBGLBBERKWKRICIiRSwSRESk\n6P8AvWlxj8FLwUsAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x14f953358>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='jaccard_st_pd', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x191a0dac8>"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEPCAYAAACgFqixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFU9JREFUeJzt3d1TVPcBxvFnXXaDhEaJSsmG6oYgQykkNFpiGFSqtNrq\nROo4GQhtetGQi172plP/gc40d73odNrOtDNOS6xIyYuGUMmgvNSiNSS4SDQq5oUuYIRQ0EXYPb1w\n2PC+Z82ePbv4/Vyd/e1vj0+YMzw55+z54TAMwxAA4IG2yu4AAAD7UQYAAMoAAEAZAABEGQAAFIcy\nCIVCCgaDVv8zAICvIMXKnTc1Nen69es6cOCAPB7PsnP9fr9aWlrkdDpVXl6urKwsK6MBAGZxWP2c\nQW9vr9auXRuxDN544w09//zzkqTGxkZVVlZaGQsAMEvC3DNIS0sLb7vdbhuTAMCDJ2HKYPYJCmUA\nAPGVMGXATWYAsI+lN5CX4vP55HA4VFBQEB4LBAKLbi+lpaXFkmwAsNLt3r17wZilZXDq1Cldu3ZN\nq1evltfr1fbt2yVJnZ2dC8qgpKREdXV1MgxDFRUVpvb/zDPPWJJbkqqrq1VXV2fZ/q1Gfvskc3aJ\n/HazOv+FCxcWHbe0DJb6pV5bW7tgzOPxqLq62so4AIAlJMw9AwCAfSgDAABlAACgDAAAogwAAKIM\nAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwA\nAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAA\nogwAAKIMAACiDAAAklKs3Lnf71dLS4ucTqfKy8uVlZW15Nz+/n6dP39eoVBI27Zt08aNG62MBgCY\nxdIy6OrqUk1NjSSpsbFRlZWVS869dOmSDh06JEk6ceIEZQAAcWRpGaSlpYW33W73snODwaBCoZAk\nKTU11cpYAIB5LC0DwzDC25HKoLCwUK+++qocDoeqqqqsjAUAmMfSMggGg6bnvv/++/rlL38pwzB0\n/PhxU5eJMjIyvko82/dvNfLbJ5mzS+S3mx35LS2DQCCw6LbP55PD4VBBQUF47KGHHpIkORwOPfLI\nI6b2PzIyEqOk9uzfauS3TzJnl8hvNzvyW1oGJSUlqqurk2EYqqioCI93dnYuKAOv16uGhgZJUlFR\nkZWxAADzWFoGHo9H1dXVC8Zra2sXjOXn5ys/P9/KOACAJfDQGQCAMgAAUAYAAFEGAABRBgAAUQYA\nAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABRBgAAUQYAAFEGAABFUQaX\nL1+2MgcAwEamy6C3t3fO6/7+/lhnAQDYxHQZ3L59e87r999/P+ZhAAD2MF0GpaWleuedd6zMAgCw\niekyOHLkiMbGxlRfX69jx46pp6fHylwAgDhKMTvxBz/4gbZu3Rp+ffLkSUsCAQDiz/SZwewikKQf\n/vCHMQ8DALBHVM8ZBAIB9ff3a3Jy0qo8AAAbmC6Dvr4+vfnmmxodHdUbb7zBcwcAsIKYvmfQ3d2t\nqqoqSVJxcbGOHj2qvLw8y4IBAOLH9JmB2+2e8zo1NTXmYQAA9jBdBtPT0wqFQpKkUCikiYkJy0IB\nAOIr4mWi+vp6SdLY2JgOHz6swsJC9fT0aNOmTZaHAwDER8QySE9P1969exeMNzU1WRIIABB/ES8T\nLVYE88e7u7tjlwgAEHcx+XsGN27ciMVuAAA2iUkZGIYRi90AAGxi+jmD5TgcjkXH/X6/Wlpa5HQ6\nVV5erqysrGX309/fr+7ubjmdTu3du1culysW8QAAEcSkDJbS1dWlmpoaSVJjY6MqKyuXnPvFF1/o\n9u3by84BAFjD0r+BnJaWFt6e/9DafD6fT4FAQA0NDSx1AQBxZuk9g9njkcrgxo0bGh0d1cGDB3Xl\nypXwA24AAOvFpAyKi4sXHQ8Gg6b3kZKSovLycknS448/rpGRkVhEAwCYcN/3DHp6elRUVCRJ8nq9\ni84JBAKLbvt8PjkcDhUUFITHNm/erGvXrik3N1cjIyMqLCyMmCEjI+M+05tj9f6tRn77JHN2ifx2\nsyO/6TI4c+aMduzYEX59/fr1cBkspaSkRHV1dTIMQxUVFeHxzs7OBWVQXFys+vp6Xbx4UevXr1dK\nSuRoVp89JPvZCfntk8zZJfLbzY78psvg1q1bUe/c4/Gourp6wXhtbe2i8w8dOhT1vwEA+OpM3zOY\n/ywBf+0MAFaOiGcGb731lu7cuaMPP/xQU1NTMgxDDodDTz/9dDzyAQDiIGIZ7N+/X9LCewYAgJXD\n9GUiigAAVi5Ln0AGACQH098m+stf/qJdu3ZpcHBQV65c0caNG1VWVmZlNgBAnJg+M1i3bp02btyo\nwcFBvfjiixoaGrIyFwAgjkyXwcw6QzMPg61axRUmAFgpTP9G/9///qdPPvlE69atu/dBygAAVgzT\nv9HLysrU09Oj73znO+ru7tbAwICVuQAAcWT6BvKmTZu0adMmSffWEVpqpVIAQPKJybWeU6dOxWI3\nAACbxKQMxsfHY7EbAIBNYlIG8xexAwAkF74SBACgDAAAlAEAQJQBAEBfoQxu3LgR3t69e3dMwgAA\n7BHxobOGhobwdigUknRvnaKPPvpIv/rVryRJ6enpFsUDAMRDxDJ4+OGHtWfPHnV0dMjlcqm4uFjv\nvfeeMjIy4pEPABAHES8T7dmzR5Lk9/tVUlIit9utZ599lgfNAGAFMX3PYP6DZTOXjAAAyc90GQSD\nQd28eVOSdPPmTd29e9eyUACA+DK9aumPfvQjNTU1aXx8XOnp6Tp06JCVuQAAcWS6DFJSUrR//34r\nswAAbGL6MlFHR4eVOQAANjJdBjP3C2acP38+5mEAAPYwXQZ37tyZc9P4s88+syQQACD+TN8z8Hq9\n+sMf/qBdu3ZJkgYHBy0LBQCIL9NnBh988EG4CAAAK4vpM4Pvf//78nq94dc8gQwAK4fpM4PZRSBJ\nJSUlsc4CALBJTJawBgAkN9OXiT7//HO1tbWF1yTq6+vT4cOHLQsGAIgf02cGb7/9tvbt2yeXy6X9\n+/crPz/fylwAgDgyXQbp6elyuVwyDENut1urVvEXMwFgpTD9G316elqSwg+ezV/SejF+v19//etf\n9dprr8nv90ecHwgE9Otf/1oDAwNmYwEAYsB0GcxcFiosLNTRo0cVCAQifqarq0s1NTWqqqrS2bNn\nI85vbW0N/zEdAED8mL6BXFhYKOleKXi9XqWmpkb8TFpaWnjb7XYvO3dkZERpaWmm9gsAiC1LL/wb\nhhHejlQG7e3tKisrszIOAGAJpsvgd7/7nTo7OyVJw8PDevfddyN+JhgMmg4yODioN998U+3t7bp4\n8aLpzwEAvjrTl4k8Ho9KS0slSd/4xjf073//O+JnZt9XmL3t8/nkcDhUUFAQHnv55ZclSb29vVq7\ndq2pTBkZGabm3S+r92818tsnmbNL5LebHflNl8H8r5KmpET+aElJierq6mQYhioqKsLjnZ2dC8pA\nkm7duqX29nZ5vV55PJ6I+x8ZGTGZ/v5YvX+rkd8+yZxdIr/d7Mhvugxmvlo6Y/bfNliKx+NRdXX1\ngvHa2tpF5z/66KN65ZVXzEYCAMSI6TJ46qmndPz4ceXk5Ojq1asqKiqyMhcAII5Ml0Fubq6ys7M1\nMDCgffv2afXq1VbmAgDEkekykKTU1FTl5ORYlQUAYBPTXy2tr6/Xv/71L0nS5cuX1dPTY1koAEB8\nRfXQ2XPPPSdJysvL04cffmhJIABA/Jkug/nLRDidzpiHAQDYI+pVS2fMXmoCAJDcTJdBZmamzp07\np7t37+r8+fNJ/4QfAOBLpsugtLRUa9asUXNzsx5++GF997vftTIXACCOovpqaV5envLy8qzKAgCw\nyX0vYf2f//wnljkAADYyfWbQ09OjS5cuaXR0VI888oj6+/u1ZcsWK7MBAOLE9JlBX1+fXnjhBT32\n2GOqqqrSk08+aWUuAEAcmS4Dl8slSQqFQnNeAwCSn+kymFmyOhgMKhQK8ZwBAKwgpsugvLxckrR9\n+3b97W9/05o1a6zKBACIM9M3kDMzMyVJGzZs0I9//GPLAgEA4u++v1oKAFg5Ip4ZnD59Wlu2bNGf\n/vQnbdy4cc5709PTys3N1TPPPGNZQACA9SKWwcyNYq/Xq8rKygXv19fXUwYAkOQilsHMjWOHw7H4\nDlKiWtECAJCATN8zWLt2bVTjAIDkYboMdu7cuej4zJkDACB58W0iAABlAACgDAAAogwAAKIMAACi\nDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAogwAAKIMAAAy8cdtvgq/36+WlhY5nU6Vl5crKytr\nybm9vb3q6+tTKBTStm3blJ2dbWU0AMAslpZBV1eXampqJEmNjY2L/tnMGTdv3tTBgwclSSdPnqQM\nACCOLL1MlJaWFt52u93Lzt2xY4eVUQAAy7C0DAzDCG9HKoMZzc3N2rp1q1WRAACLsPQyUTAYjGp+\na2urcnNzlZmZaWp+RkbG/cQyzer9W4389knm7BL57WZHfkvLIBAILLrt8/nkcDhUUFAQHmtra1Nm\nZqZycnJM739kZCQ2QW3av9XIb59kzi6R32525Le0DEpKSlRXVyfDMFRRUREe7+zsnFMGQ0NDamtr\nU35+vvr6+jQ+Pq6XXnrJymgAgFksLQOPx6Pq6uoF47W1tXNeZ2Zm6vDhw1ZGAQAsg4fOAACUAQCA\nMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgy\nAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIAAIgyAACIMgAAiDIA\nAIgyAACIMgAAiDIAAIgyAACIMgAASEqxcud+v18tLS1yOp0qLy9XVlZWTOYCAGLL0jODrq4u1dTU\nqKqqSmfPno3ZXABAbFlaBmlpaeFtt9sds7kAgNiytAwMwwhvR/oFH81cAEBsWVoGwWDQkrkAgNhy\nGLP/lzzGGhsbVVlZKUl66623tH//fkmSz+eTw+FQQUFBxLlLaWlpsSg1AKxsu3fvXjBmaRkMDAzo\n9OnTMgxDFRUVyszMlCT98Y9/lMPh0MsvvxxxLgDAepaWAQAgOfDQGQCAMgAAPOBlEAqFku5bTMmY\nebZkzw+sVJYuR5HImpqadP36dR04cEAej2fZuYmyVEY0mTs7OzU4OCiXy6VvfvObevLJJ5cdj4do\n8ktSf3+/uru75XQ6tXfvXrlcLr366qvKy8uTYRj6/PPP9bOf/SwOyRfq7e1VX1+fQqGQtm3bpuzs\n7CXnJsrxE03mRDx+oskvJfbxE83PMW7Hj/EA8/l8xmeffRZx3uuvvx7e/sc//mFlpIjMZp7txIkT\nUY1byWz+0dFRw+fzLRi/c+dOeLupqSmm2aJx+vTp8Hakn2OiHD/RZJ4tUY6faPIn+vEzW6IcPw/0\nZSKzknWpjP7+fv3mN79RIBAwNZ5IfD6fAoGAGhoadPny5fB4amqqJGl4eFgbNmywK5527Nhhem6i\nHD/RZJYS7/iJJn+iHz+S+Z9jvI6fB/YyUTSMJF0qw+v16he/+IXGxsZMjSeSGzdu6Otf/7oOHjyo\nEydOKDc3V6tWffn/Lj6fT88995yNCe9pbm7W1q1bl52TaMePmcxS4h4/ZvInw/Fj9ucYr+OHMwMT\nkvmGZ0pKii5evGh6PFGkpKSovLxckvT4449rZGRkzvuBQEAPPfSQDcm+1Nraqtzc3IgPSCbS8WM2\n84xEO37M5k+G40cy93OM1/FDGczj8/nU29s7Z2z2aVwiXlpZLPOtW7fC2wMDAxHH7bRY/s2bN+va\ntWuSpJGREa1Zsyb8XigUmvN/eXZoa2tTZmamcnJy5own8vETTeZEPH6iyZ/ox89SP0c7j58H9jLR\nqVOndO3aNa1evVper1fbt2+XdO8u//x1k0pKSlRXVxdeKsMu0WTu6OjQ5OSkpqen9dRTT0UcT7T8\nxcXFqq+v18WLF7V+/XqlpHx5qH788cfavHlzXLPPNjQ0pLa2NuXn56uvr0/j4+N66aWXJCXu8RNt\n5kQ7fqLNn8jHj7T0z9HO44flKAAAXCYCAFAGAABRBgAAUQYAAFEGAABRBgAAPcDPGQBLOX36tD79\n9FNlZ2dr586ddscB4oIyAObZuXOnbt68KZ/PZ3cUIG4oA6xY58+f19tvv62f/OQn6uzsVFpamgoK\nCrR+/Xo1NzcrLS1N09PT8ng82rZt25zPLvUs5pkzZzQ8PCyXy6XJyUnt3btXX/va19TT06OGhgbt\n3r1bZWVl4de7du3S9u3bNTQ0pNbWVrndbk1NTSk/P19FRUWSpHPnzqm5uVk5OTlKT0/X1NSUDh48\nGP43P/roI33wwQdKSUlRKBTSwMCAfv7zn0uSLl++rO7ubqWmpmpyclJbt27VE088YdFPFCuaZYtj\nAwng73//+4J16//85z8bU1NT4dcdHR1Gf3//nDlDQ0NGa2vrnLHe3l6jvb09/Pru3bvGsWPHwq/P\nnDljfPrpp+H3jh8/vmSuo0ePznn9+9//Prz93nvvGVeuXDEM4976+/X19XPmdnR0GIZhGBMTE8aR\nI0fmvHfs2LE5/22AWZwZYEVzOBzas2fPnDG/368TJ06EX4dCIblcLm3atGnZfV29elWTk5N6/fXX\nJd07e5iYmAi/X1paquPHj+uFF15QR0eHysrKwu8NDw+rra1NTqdTq1at0ujo6Jx9r1u3Lrydmpqq\nu3fvSrq3iNn8hdlKS0slSYODgxobG5uTJxAI6NatW6ZXJQVmUAZY0YxFLvfk5eVp3759cxYvm56e\njvjZoqIi/fe//51zSWn255xOp9LT0zU2Nqbh4eE5v5Dfeecdvfjii+HVMl977bWIOSXpscce04UL\nF/Ttb387PPbJJ59ow4YNys7OVlZWlg4cOBB+LxQKLbofIBIWqsOKdfLkSV24cCF8bf7pp5+W1+vV\nxMSE/vnPf8rpdMowDN2+fVvPPvusnnjiCfn9fp09e1YTExMaGhpSTk6OcnNz9a1vfUuSdOHCBX38\n8cdatWqVgsGgHA6HKisrw//mxMSEfvvb3+rAgQNzVp5899139cUXX8jhcGhqakpXr17V9773PW3Z\nskWtra1qb2/XK6+8oszMTB05ckShUEg//elPJUmXLl1ST0+P3G63pqen9eijj2rXrl2S7hXDuXPn\nwsU2Pj6u559/Xunp6XH5GWPloAwAADx0BgCgDAAAogwAAKIMAACiDAAAogwAAKIMAACiDAAAkv4P\nAJ0+A6ftmBMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x128f23f98>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='jaccard_st_br', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x160b02c50>"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEPCAYAAAC3NDh4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X1QXOXdN/DvsgtsVjSBEESKyQrIJBTzXqLRRO6QNgyJ\nBmmaSUTNPWr6WL3943HsdOozo+N07pm2dupMe+tY21rbqBgFGseINGa9icCWQsRMyG5ImhcSYroQ\nZA2GZJFlz/MH3eMu7GHPLnv27AXfz4wzF4ezZ385XpzfOed6M0iSJIGIiCiEJL0DICKixMUkQURE\nipgkiIhIEZMEEREpYpIgIiJFTBJERKTIpOXBXS4XbDYbjEYjSktLkZ2dPeX+Ho8HL774Inbt2oWc\nnBwAwP79+zE2NgYAyM/PR3FxsZYhExFRAE2TRHt7O6qrqwEA+/btQ2Vl5ZT7NzU1YdOmTUHbzGYz\nNm7cqFmMRESkTNMkYbFY5HJKSsqU+7rdblgsFpjN5qDtXq8X9fX1AACr1YqVK1fGPlAiIgpJ0yQR\nOJg7XJJoaWnB5s2b0d3dHbS9vLxcLjc2NsY2QCIimpKmScLflqBGX18f3n//ffT19cFqtcptEoFM\nJk3DJSKiCTS96no8npBlh8MBg8GAoqIiedujjz4KAHA6nZg3b568vaenB1arFQAwMjIS9jttNtt0\nwyYimpXKysombdM0SZSUlKCmpgaSJAU1Ptvt9klJAgAGBwfR0tIS9CTR29uLjo4OGAwGrFixQtX3\nst2CiCgynZ2dIbcbZtossDabjUmCiChCnZ2dIZ8kOJiOiIgUMUkQEZEiJgkiIlLEJEFERIqYJIiI\nSBGTBBERKWKSICIiRUwSRESkiEmCiIgUMUkQEZEiJgkiIlLEJEFERIqYJIiISBGTBBERKWKSICIi\nRUwSRESkiEmCiIgUabp8qcvlgs1mg9FoRGlpKbKzs6fc3+Px4MUXX8SuXbvk5UsjPQYpczqdADBp\n2VgiIiWaPkm0t7ejuroaO3bsQFtbW9j9m5qasGnTpmkdg5TV1taitrZW7zCISCCaJgmLxSKXU1JS\nptzX7XbDYrHAbDZHfQxS5nQ6cfz4cRw/flx+oiAiCkfTJCFJklwOd4FvaWnBXXfdNa1jkLLAJwg+\nTRCRWpq2SYyNjanet6+vD++//z76+vpgtVrlNolIjuGXnp4e8WdmOpPJFFTmOSIiNTRNEh6PJ2TZ\n4XDAYDAENaA++uijAMZfi8ybNy/sMabidrujjnmmqqysRFdXl1zmOSIiNTRNEiUlJaipqYEkSdi4\ncaO83W63T0oSADA4OIiWlpagJwmlY1BkioqKsGTJErlMRKSGQQp86T8D2Gw2rFy5Uu8wEhK7wBKR\nks7OTpSVlU3arumTBCUWJgciihRHXBMRkSImCSIiUsQkQUREipgkiIhIEZMEEREpYpIgIiJFTBJE\nRKSISYKIiBQxSRARkSImCSIiUsQkQUREipgkiIhIEZMEEREpYpIgIiJFTBJERKSISYKIiBRpuuiQ\ny+WCzWaD0WhEaWkpsrOzFfe12+3o6+tDcnIylixZgvz8fADA/v37MTY2BgDIz89HcXGxliETEVEA\nTZNEe3s7qqurAQD79u1DZWWl4r5r166Vyw0NDXKSMJvNXNuaiEgnmiYJi8Uil1NSUsLu39PTg3fe\neQcFBQXyNq/Xi/r6egCA1Wrl+tVERHGkaZKQJEkuq0kSVqsVTz31FIaGhuRt5eXlcrmxsTG2ARIR\n0ZQ0TRL+toRImEwmHDt2DOvXrw/5OzXS09Mj/l4iIppM0yTh8XhClh0OBwwGA4qKiuRtg4ODyMjI\nAABcvHhR3t7T0wOr1QoAGBkZUfW9brd7OmETEdG/aZokSkpKUFNTA0mSghqf7Xb7pCTR2tqKkZER\neL1eLF26VN7e29uLjo4OGAwGrFixQstwiYhoAoMU2HAwA9hsNjZuExFFqLOzE2VlZZO2czAdEREp\nYpIgIiJFTBJERKSISYKIiBQxSRARkSImCSIiUsQkQUREipgkiIhIEZMEEREpYpIgIiJFTBJERKSI\nSYKIiBQxSRARkSImiVnE6XTC6XTqHQYJiHVHX3qe/6iTxOjoaCzjoDiora1FbW2t3mGQgFh39KXn\n+Y86SXC9abE4nU4cP34cx48f5x0hRYR1R196n/+ok4TP54tlHKSxwLsQ3hFSJFh39KX3+Q+7fOmv\nf/1rLFy4EJIkwWAwAAAkScKJEyewdevWKT/rcrlgs9lgNBpRWlqK7OxsxX3tdjv6+vqQnJyMJUuW\nID8/P+JjEBGF4r8DD1wymdQJ+ySRl5eHbdu24Qc/+AG2bdsml4uLi8MevL29HdXV1dixYwfa2tqm\n3Hft2rW47777sGXLFpw4cSKqY5Cybdu2hSwThTMT6o7IbSp6n/+wTxJ33HFHyO3z5s0Le3CLxSKX\nU1JSwu7f09ODd955BwUFBVEfg0IrKirCkiVL5DKRWqLXHf87fX9ZtH+D3uc/bJK48cYbQ24vLS0N\ne3BJkuSymgu81WrFU089haGhoaiPQcpEvQsk/Ylcdya+03/22Wd1jCY6ep7/sEkikMfjgcvlwk03\n3YTU1NSw+4+NjUUekMmEY8eOYf369VEfIz09PeLPzAZ33nmn3iGQoESuOyaTKags4vVBz/OvOkl0\nd3ejq6sLt956Kzo6OrBs2TIUFhZO+RmPxxOy7HA4YDAYgh6dBgcHkZGRAQC4ePFi2GNMxe12q9qP\niGa+yspKdHV1yWVeHyKjOkkcOXIEO3bsAAAsX74ce/fuDZskSkpKUFNTA0mSsHHjRnm73W6flCRa\nW1sxMjICr9eLpUuXhj0GEZEaer/TF53qJDGxPcBsNof9TE5ODnbu3Dlp++7duydtu+eeeyI6BpFI\n2AVTX6tXr9Y7hGlpaGgAAFRUVMT9u1UPpvN6vfIAOp/Ph+HhYc2CIpppRO6CORMcPnwYhw8f1juM\nqNXV1aGurk6X7w77JOGv2ENDQ3jmmWdQXFyMrq4uLFq0SPPgiGYC0btgik7089/Q0ICrV6/K5Xg/\nTYRNEmlpaSgvL5+0nXM3EakzE7pgikz08x/4BFFXVxf3JBH2dVOoBDFx+5EjR2IXERElHE4VPnvF\nZD2Jc+fOxeIwRDOS3tMqxILIbSqin//vf//7IcvxEtFgOiWBo6KJKJjoXTBFf6cv+vmvqKiQXznp\n0bspJknCPzssEYUm4h2sn+jv9AGxzz+gzxOEX0ySBBFNTcQ72JlE9POvxxOEH9e4JqIpif5On6aH\nbRKzCEf96kfkcy/6O/2ZQM/6E5MnieXLl8fiMKQxkXuoiE70c+9fcIz0oWf9iTpJ+GdVBMbXgaDE\npvdi6rPZTDj3RUVFfIrQid71R3WS+OSTT4J+Pnv2bMyDIe3ovZj6bMZzT9Ohd/1RnSQGBwe1jIOI\niBKQ6iQxcSzEyMhIzIMh7bCHin547mk69K4/YXs37d+/H9euXcOJEycwOjoKSZJgMBiwbNmyeMRH\nMcIeKvrhuafp0Lv+hE0SW7ZsATDeJuFfd1otl8sFm80Go9GI0tJSZGdnK+7rdDrR3d0Nn8+H22+/\nHbm5uQDGk5R/nev8/HwUFxdHFAN9g3ex+uG5p+nQs/6oHicRaYIAgPb2dlRXVwMA9u3bh8rKSsV9\nBwYGUFVVBWB8znR/kjCbzVy2NEZ4F6ufnp4eAOL+PxB5nAcAPPfccwCA559/XudIoqNn/dF0Wg6L\nxSKXJy5/OpFSEvJ6vaivrwcw3tV25cqVsQuQKE70nKAtFvy9akSctwkATp48qXcI06Jn/VHdcP36\n66/j/Pnz6OjowFtvvYWWlpawnwkciR0uSfgdOHAgaD3a8vJyVFVVoaqqCv39/WrDJUoY/pXFrl69\nKq9VLBK9++lPl/8pYmJZFHrXH9VJYv78+Vi4cCH6+vpw//33q7pg+9sS1GpqakJBQQGysrJC/t5k\n4nyEJJ6JK4uJRu9++tMV+BQh4hOF3vVH9VXX/1Tgv1AnJYXPLx6PJ2TZ4XDAYDAEvV9rbm5GVlYW\n8vLygo7R09Mjj+hW2+02PT1d1X5E8RDYfdxgMAhXPwNvzkwmk3DxTyRa/HrXH9VJ4quvvkJvby/m\nz58PQF2SKCkpQU1NDSRJCmp8ttvtQUmiv78fzc3NWLx4Mbq7u3HlyhU89NBDAIDe3l50dHTAYDBg\nxYoVqmJ1u91q/1lEmquqqsKePXvksmj1s7KyUp6Gp7KyUrj4CwsL5SeIwsJC4eLXu/4YJJVTuJ47\ndw4OhwMVFRU4cuQI2tra8Nhjj2kdX8RsNhsbtynhPPLIIwCAP/7xjzpHEp2f/OQnAIBf/OIXOkcS\nnZ07dwIAampqdI4kOvGoP52dnSgrK5u0XfWTxKJFi7Bo0SIA47O+zsaZX0XvBig6kc+/niuLxcJX\nX32ldwjTkpmZqXcI0+IfTKeHmEwVfvDgwVgcJuGJPt2z6EQ+/xUVFcJ2f3U6nXC73XC73UL2bgIg\n9w4Slb93mR5ikiSuXLkSi8MkNNG7AYpO9PPf0NAgZPdXAPjd734XsiwKvbuQTpfe8cckSUyc/G8m\nEr0boOhEP/91dXVCdn8FxmdDCFUWhd5dSKdL7/i5xrVKw8PDIcsUHyKff73vBKcrOTk5ZFkUgeO1\nIh27lQgCu/7rMfs2k4RKE/sqU3yJfP71vhOcru3bt4csi2LevHkhy6Lw+Xwhy/HCJKFS4DxUgWWK\nD55//VRUVCA1NRWpqalCNr5nZGSELJM6TBIq6b3wRyw4nU4hG30Bsc9/YPdXUbvCBnaBF43IdQfQ\n/0ko6iRx7tw5uRxqAMZM41/4Y8mSJUL20wfE7kJaVFSEpKQkJCUlCXf+A+++RbwTB4BTp07h1KlT\neocRlcD6IlrdARC0Ds9Ua/JoJexgOv803cA378MkScKpU6fw05/+FACQlpamUXiJRcS7ED9/F1J/\nWbQ/loaGBrn+NTQ0CHWxfe2114LKDz/8sI7RRO61116Tz72o8QeWRYs/NzdX/tv1r7MTT2GfJK67\n7jpUVVXhxhtvxMKFC3Hvvfdi4cKF+M53vhOP+BJKUVGRcBdXv5nQhTRUWQQ2my1kWRSMX1+tra0h\ny/ESNkls2rQJwPhSpCUlJUhJScGaNWtmxQA6IgpeF0blVG8JRfT49aa6TWJit0M9umJR9ERvvBO5\n8TewzU7E9ju9G06nS/T49a77qpPE2NiYPNpyYGAAX3/9tWZBEU3kX1NkYlkEejc8TtcNN9wQsiyK\n66+/PmRZFBUVFTAYDDAYDIm9fOl9992HtrY2vP3222hraxPybnQ2E71NQuT4RW5PAYChoaGQZVEE\nzmAr4my2TqcTkiRBkiRdurCrnircZDJhy5YtWsZCRAno8uXLIcuiED3+v/zlL0Hln//853H9ftVP\nEnq0qlPsiN4mIXL8er9Tni7R524SPf5Lly6FLMeL6iQxcfbHw4cPh/2My+XCm2++ibfffhsul2vK\nfZ1OJ+rr61FbW4sLFy5EdQxSJvpgQJHjr6iogMVigcViEWp8h5/oczeJHn/ggkl6LJ6kOklcu3Yt\nqLH6888/D/uZ9vZ2VFdXY8eOHWhra5ty34GBAVRVVWHbtm04evRoVMfQmsjTWgDjd+Ci3YUH8l9o\nRZSRkSHsvEGijxgXPf5du3aFLMeL6jYJq9WKV199FRs2bAAA9PX1hf1M4B90SkrKlPuuX79+2sfQ\nmr/B9Nlnn9U1jmiJdgc+0WeffaZ3CFELfDomioTe04qofpI4evSonCDUChy4ovYCf+DAAaxevXpa\nx9CC6Cujic4/NYTP5wuaZkEEv/rVr0KWRfHcc8+FLIviySefDFkWxSOPPBKyHC+qk8T3vvc9eVqK\noqIiLF++POxnIl3go6mpCQUFBcjKyor6GFoRuQvmTCDy1AqffvppyLIoTp48GbIsCtFX1gtcm1uP\ndbojet0UqKSkJOxnPB5PyLLD4YDBYAh6dGpubkZWVhby8vJUHWMq6enpqvaLhMlkCipr8R2knsjn\nX+TYAcavt3jHrzpJTHTu3Lmw88uXlJSgpqYGkiRh48aN8na73R6UJPr7+9Hc3IzFixeju7sbV65c\nwUMPPTTlMabidruj/Fcpq6ysRFdXl1zW4jtIWVlZGT766CO5LNL5X7VqlfwEsWrVKqFiB4DCwkL5\nCaKwsFC4+DMzM+UniMzMTOHiT01NlZctTU1NjXv8qpPEF198gebmZnnOpu7ubjzzzDNTfiYnJwc7\nd+6ctH337t1BP2dlZSkeS+kY8ebvgukvi8jfliJi/A8//DAOHjwol0VSUVEhJwkRe9esWbNGThJr\n1qzROZrI/ehHP8LPfvYzuSyauXPnor+/Xy7Hm+o2iQ8//BCbN29GcnIytmzZgsWLF2sZV0ISvQup\nyIsOAeOTs4k4QZvo7VmiTysi+vnXu01F9ZNEWloakpOTIUkSUlJSkJQ0+1Y+FfEO3E/0RYecTqf8\nmC1a/KdPnw5ZFsW1a9dClkWhd8PvdBmNRvkNjtFojPv3q77Se71eAJAH1E2cOnw2EHkwneh3UyLH\nHzgIVcTZk0Vfj0H0+AMHYeoxIFN1kvC/XiouLsbevXtV9zSaSUR/XUM0G1133XUhy6LQO0moft1U\nXFwMYDxZWK1WmM1mzYJKRKK/rgkcb5IoY08iofc6v9NhsVjk1xwiTiuSlJQkv+4Q8TWz6HX/1KlT\nIcvxIt7/cZ2I/LoDEH9AlN7r/E5HNGN9EkngKpQirkgpet0fHR0NWY4X1Uni5Zdfht1uBzA+Xe3H\nH3+sWVBEE4l8Nyj6RZZmN9VJIicnB2vXrgUA3HzzzRgcHNQsqEQk8noGwPggrlBlUQT2D9ejrzjR\nbKU6SUx8Fxk4TcVsIPJ6BgDw9NNPhyyLIrANbLa1hxHpSfWV3t8F1k/ErnzTJeITRCA9FiyJlcAu\n16J1v05LS8OVK1fkMlEk9O44oDpJLF26FHV1dcjLy8Pp06dx2223aRlXQhLxCSKQyK8IA3sFidZD\n6Oabb5Z7Zt188806RzP76H2RnS6z2Sz3jtPjKVr1GSsoKMDmzZsxd+5cbN68WZ7HiMQg8noMAPDV\nV1+FLItA5NhnAtE7Dug9YjyitGo2m5GXl4c5c+ZoFU9CE3nEtcjrMQDBK7uJtsqbyLETqU4StbW1\n+Pvf/w5gvK+xf9rs2YQjrolotonoSeKOO+4AMD6n/IkTJzQJKFGJvnxpWVlZyLIoCgsLQ5ZFIHLs\nRKqTxMQGEz1mI9ST6COus7OzQ5ZFcf3114csE810gddaIWaB9dNiNkWfzyfcaFpRiL4mgMjrRIs+\nLQTpS+/ZBlR3gc3KykJHRweWLVuGo0ePqlpn1eVywWazwWg0orS0dMo72MbGRpw9exZbt25FTk6O\nvH3//v3yicnPz5cnGoy31atXy90YV69erUsM06H3/C9EJCbVSWLt2rU4efIkDhw4gPz8fFUXyvb2\ndlRXVwMA9u3bh8rKSsV9y8vLQ77rN5vNqte21tLhw4eDyiIuQ0lEFKmI5tYoLCyMqOEtcNBTSkpK\nJF8l83q9qK+vBwBYrVasXLkyquPMdqL3FScifUQ9/FDNe+HAdotok0R5eTmqqqpQVVUlLwauB9En\n+GOSIKJoqH6S6OrqwvHjx/Hll1/ihhtuQE9PT9jZRGPdyKJ2UkE17SWRuvPOO5GXlyeXRTNxCUct\nzpGWJk6tIFr8gUSOHWD8eot3/KqTRHd3N7Zv3473338f99xzD959992wn1FabMXhcMBgMKiaC6mn\npwdWqxUAMDIyoipWt9utar9I+Xt4aXV8LZlMJjl+k8kk3L+hrKwMH330kVwWLf5AIscOMH69xTt+\n1UkiOTkZwDevKvw/T6WkpAQ1NTWQJCmo8dlut09KEgcPHsSZM2cwZ84cWK1WrFu3DgDQ29uLjo4O\nGAwGrFixQm24Med0OnH+/Hm5LNpkf6K/bvrHP/4RVH744Yd1jIZo9lCdJPxTg4+NjcHn86kaJ5GT\nk4OdO3dO2r579+5J25R6MPmThd4mDqZ79tlndYwmcqIniaGhoZBlItKW6obr0tJSAOMX7bfeeour\ngxERzQKqk0RWVhYAYMGCBXjggQewYcMGzYJKRKL3bhJ9ZbfAdQBEXBOASFT8a1OpqKgIqampSE1N\nFa49AhD/Iit6kiMSVdg2iUOHDmHVqlX4wx/+gIULFwb9zuv1oqCgYFYMcHM6nXLvKhEbrjMzM+WG\ndxGXMb127VrIMhFpK+wtpb+B2mq1yoPa/P9t374dZ86c0TzIRPDnP/85ZFkUu3btClkWxcRxHkQU\nH2GfJPwN1kqLz6sd4Ca6gYGBkGUioplM9cvpefPmRbR9plmwYEHIsihEXw+DiPShOkncfffdIbf7\nnzRmOtFXFwtcSXC2rSpIRNETr5uLTlpbW0OWRSH6YDoi0geTBBERKWKSUClw5lcRZ4ElIooGk4RK\nFy5cCFkmIprJmCSIiEgRk4RKos/dREQUDSYJlfbu3RuyTEQ0kzFJqHTy5MmQZSKimSyhkoTP54v5\nuthERBQ9TSdecrlcsNlsMBqNKC0tRXZ2tuK+jY2NOHv2LLZu3YqcnJyojkFERLGl6ZNEe3s7qqur\nsWPHDrS1tU25b3l5ecipPyI5BhERxZamScJiscjllJQU3Y5BRETR0TRJBM77H+0FPhbHICKi6Gja\nJhGLRuhojpGenj7t750oKSlJnhgvKSlJk++IJ8avH5FjBxi/3uIdv6ZJwuPxhCw7HA4YDAZVS4Aq\nHWMqbrc7gijVMZvNuHr1qlzW4jviifHrR+TYAcavt3jHr2mSKCkpQU1NDSRJwsaNG+Xtdrt9UpI4\nePAgzpw5gzlz5sBqtWLdunVTHiPe/AliYpmIaCbTNEnk5ORg586dk7bv3r170jalBKB0DCIi0l5C\nDaYjIqLEwiRBRESKmCSIiEgRkwQRESnStOGaaLb48Y9/rHrFwnAdMXJzc/HCCy/EIizVRI+ftMMk\nQRQDU10UAy/AiXoBnSqm5557Tp4ev7CwEM8//3y8wqIEwCRBpLEXXnhBvvtOxAQRzvPPPy/Hn4gJ\nIpKnIIBPQpFikiCKg9zc3IguZImmsLAwYRfbCndBr66uDppS580334xHWDMGk8QMEcu7qUR/Jw4k\nXvzhBD5NiCjwaUI0b775phw7E0TkmCRmiHAXxQceeECeLNFoNOKNN96IR1iqqbmo+//Qa2pqtA6H\nZpjACTopMuwCO0sEJoVESxBqFRYW6h0CCYpPENHjk8QsYjQahV5DXORXHkRKEv1VK5PELPLGG2/w\nIkuUYMJd1Cf+zcb7dStfNxERJbDApKBHexyTBBERKeLrpn9L9PeCRDR71dTU6PaqmEni39gFk4ho\nMk2ThMvlgs1mg9FoRGlpKbKzsyPed//+/XKPnPz8fBQXF2sZ8pRWrVqFTz/9VLfvJyKKN02TRHt7\nO6qrqwEA+/btQ2VlZcT7ms1mXde2DvT000+zdxARzSqaJgmLxSKXU1JSotrX6/Wivr4eAGC1WrFy\n5coYR0lEREo0TRKSJMnlcElCad/y8nK53NjYGMPoiIgoHE2TRCSje9XsazKpCzc9PV3190ZD6+Nr\njfHrR+TYAcavNz3i1zRJeDyekGWHwwGDwYCioqKw+/b09MBqtQIARkZGVH2v2+2ONuSEOL7WGL9+\nRI4dYPx60yN+TZNESUkJampqIElSUOOz3W6flCSU9u3t7UVHRwcMBgNWrFihZbhERDSBpkkiJycn\nZG+g3bt3q9533bp1msRGREThcVoOIiJSxCRBRESKmCSIiEgRkwQRESniBH9EU3jsicdxeTB23Q5j\nNa3L3Ix0vPLSy2H3e+yJ/8LlwS9i8p1ALOOfj1de+p+YHIu0xSRBmnr8icfgHrwc02PG6kKVnjEX\nL7/0ypT7XB50w/jD8in30cPlV9XNPnB58AukPvp/NY4mcpf/8KKq/Z544kkMDg7E7HtjVXcyMjLx\n0ku/jcmxEh2ThAD+64nH8EUML7Sx+kOZnzEX/xPmIusevIz/+E9pyn308r+vxzZ5UewNDg5g5w/3\n6B3GJDWvPqh3CHHDJCGALwYv4/99P/H+V/13HS+yRFN58vEnMeBOvCehzPRM/PZldU9CiXfl0cCT\njz+BAfdgzI4Xu/9RGfjtyy/F5FhElHgG3AN4vex3eocxyX/a/o/qfWdFkhhwD2LP1gf0DmOSB997\nQ+8QiIimxC6wRESkiEmCiIgUMUkQEZEiJgkiIlI0KxquiaZjTOXAtUQ1onLgGlEoTBJEYSTiiOtI\nElcijriOJHHNpoFriUjTJOFyuWCz2WA0GlFaWors7OyI943kGJSY/vd1g94hkMA44lpfmiaJ9vZ2\nVFdXAwD27duHysrKiPeN5Bgz2X/XefUOIWqJOy0HkxdROJomCYvFIpdTUlKi2jeSY8xkiTkth7iJ\ni4jU0bR3kyR9cwcZ7gKvtG8kxyAiotjSNEmMjY1Ne99IjkFERLFlkAJv1WMssA1h//792LJlCwDA\n4XDAYDCgqKgo7L5K25XYbLaY/zuIiGaDsrKySds0TRIXL17EoUOHIEkSNm7ciKysLADA73//exgM\nBjz66KNh91XaTkRE2tM0SRARkdg4LQcRESlikiAiIkVMEiH4fD7helWJGHMg0eMnmqkSb4SWzhob\nG3H27Fls3boVOTk5U+6bKFOGRBKz3W5HX18fkpOTsWTJEuTn50+5PR4iiR8Aenp6cOTIERiNRpSX\nlyM5ORkvvPACCgsLIUkSvvjiCzzyyCNxiHwyp9OJ7u5u+Hw+3H777cjNzVXcN1HqTyQxJ2L9iSR+\nILHrTyTnMW71R6JJHA6H9Pnnn4fd77333pPLf/3rX7UMKSy1MQf64IMPItquJbXxf/nll5LD4Zi0\n/dq1a3K5sbExprFF4tChQ3I53HlMlPoTScyBEqX+RBJ/otefQIlSf/i6aRpEnTKkp6cHv/zlL+Hx\neFRtTyQOhwMejwf19fU4efKkvN1sNgMALl26hAULFugVHtavX69630SpP5HEDCRe/Ykk/kSvP4D6\n8xiv+sOGwt27AAAFGklEQVTXTdMgCTpliNVqxVNPPYWhoSFV2xPJuXPncOONN6KqqgoffPABCgoK\nkJT0zb2Ow+HAHXfcoWOE4w4cOIDVq1dPuU+i1R81MQOJW3/UxC9C/VF7HuNVf/gkMQ0iN7SaTCYc\nO3ZM9fZEYTKZUFpaCgD41re+BbfbHfR7j8eD1NRUHSL7RlNTEwoKCsIO/Eyk+qM2Zr9Eqz9q4xeh\n/gDqzmO86g+ThEoOhwNOpzNoW+DjYCK+ogkV8+DgoFy+ePFi2O16ChX/rbfeijNnzgAA3G435s6d\nK//O5/MF3RXqobm5GVlZWcjLywvansj1J5KYE7H+RBJ/otcfpfOoZ/3h66YJDh48iDNnzmDOnDmw\nWq1Yt24dgPFeBxPnmyopKUFNTY08ZYheIom5tbUVIyMj8Hq9WLp0adjtiRb/8uXLUVtbi2PHjiEz\nMxMm0zdV+Pz587j11lvjGnug/v5+NDc3Y/Hixeju7saVK1fw0EMPAUjc+hNpzIlWfyKNP5HrD6B8\nHvWsP5yWg4iIFPF1ExERKWKSICIiRUwSRESkiEmCiIgUMUkQEZEiJgkiIlLEcRJEKh06dAgXLlxA\nbm4u7r77br3DIYoLJgkile6++24MDAzA4XDoHQpR3DBJ0Kxz+PBhfPjhh3jwwQdht9thsVhQVFSE\nzMxMHDhwABaLBV6vFzk5Obj99tuDPqs09vSTTz7BpUuXkJycjJGREZSXl+P6669HV1cX6uvrUVZW\nhrvuukv+ecOGDVi3bh36+/vR1NSElJQUjI6OYvHixbjtttsAAB0dHThw4ADy8vKQlpaG0dFRVFVV\nyd956tQpHD16FCaTCT6fDxcvXsTjjz8OADh58iSOHDkCs9mMkZERrF69GrfccotGZ5RmNM0mISdK\nYO+8886kdQP+9Kc/SaOjo/LPra2tUk9PT9A+/f39UlNTU9A2p9MptbS0yD9//fXX0rvvviv//Mkn\nn0gXLlyQf1dXV6cY1969e4N+fuWVV+TyZ599Jv3zn/+UJGl8/YPa2tqgfVtbWyVJkqTh4WFpz549\nQb979913g/5tRGrxSYJmJYPBgE2bNgVtc7lc+OCDD+SffT4fkpOTsWjRoimPdfr0aYyMjOC9994D\nMP60MTw8LP9+7dq1qKurw/bt29Ha2oq77rpL/t2lS5fQ3NwMo9GIpKQkfPnll0HHnj9/vlw2m834\n+uuvAYxP/jZxQru1a9cCAPr6+jA0NBQUj8fjweDgoOpZXon8mCRoVpJCvDYqLCzE5s2bgyZ983q9\nYT9722234V//+lfQq6nAzxmNRqSlpWFoaAiXLl0KulD/7W9/w/333y/PPvr222+HjRMAbrrpJnR2\ndmLFihXytt7eXixYsAC5ubnIzs7G1q1b5d/5fL6QxyEKhxP80azT0NCAzs5O+d3/smXLYLVaMTw8\njI8++ghGoxGSJOHq1atYs2YNbrnlFrhcLrS1tWF4eBj9/f3Iy8tDQUEBvv3tbwMAOjs7cf78eSQl\nJWFsbAwGgwGVlZXydw4PD+M3v/kNtm7dGjST58cff4zLly/DYDBgdHQUp0+fxne/+12sWrUKTU1N\naGlpwQ9/+ENkZWVhz5498Pl82LVrFwDg+PHj6OrqQkpKCrxeLzIyMrBhwwYA4wmjo6NDTnhXrlzB\nvffei7S0tLicY5o5mCSIiEgRB9MREZEiJgkiIlLEJEFERIqYJIiISBGTBBERKWKSICIiRUwSRESk\niEmCiIgU/X+CzEIPNawX8QAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x160b020f0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='jaccard_st_bl', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Edit Distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from nltk.metrics import edit_distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def calc_edit_distance(row, col):\n",
    "    dists = [min([edit_distance(w, x) for x in row['tokens_'+col]]) for w in row['tokens_search_term']]\n",
    "    return (min(dists), sum(dists)) if dists "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['edit_dist_st_pt_raw'] = df.apply(lambda x: calc_edit_distance(x, 'product_title'), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['edit_dist_st_pt_min'] = df['edit_dist_st_pt_raw'].map(lambda x: x[0])\n",
    "df['edit_dist_st_pt_avg'] = df['edit_dist_st_pt_raw'].map(lambda x: x[1]) / df['len_search_term']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x163222cf8>"
      ]
     },
     "execution_count": 194,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEPCAYAAACukxSbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X1wVOX9NvBrd/NGiIUsYcWQwUADDS+2qaE2oggV0DRo\nw0TaEggUA2LHPp3O0ynTGZ+xTsfpOFOc+c201dHOr7RTqEElEURiDESDgYgKEYVsIkISNdC8kQ2Z\nJCab7O7zRybrLizs2bPnnHvvnOvz18lhs+fyeO8399579nssPp/PByIiMgWr6ABERGQcFn0iIhNh\n0SciMhEWfSIiE2HRJyIyEd2Lvtfrhcfj0fswRESkgK5Fv6qqCi+99BI6OzuD9g8PD+PZZ5/F5cuX\n9Tw8ERFdw6L3dfpOpxPTp09Henq6f19VVRUcDgdmzZoVtJ+IiPRl+Jq+y+VCcnIykpKSjD40EZHp\nGV70jx8/jnvvvdfowxIREYA4ow/Y2dmJQ4cOobOzE5mZmVzeISIykOFFf/v27QC+WesPp6amRu9I\nREST0qpVq67bp2vRP3r0KFpaWjBlyhRkZmZi+fLlAIDe3l4cP35c8Uz/zjvv1DMmEdGk09DQEHK/\nrkV/9erVIffb7Xbs2LFDz0MTEVEI/EYuEZGJsOgTEZkIiz4RkYmw6BMRmQiLPhGRibDoExGZCIs+\nEZGJsOgTEZkIiz4RkYmw6EvM6XTC6XSKjkESkn3syJ5fJBZ9ie3fvx/79+8XHYMkJPvYkT2/SCz6\nknI6nWhqakJTUxNnPBQR2ceO7PlFY9GXVOAshzMeioTsY0f2/KKx6BMRmQiLvqTWr18fcpsoHNnH\njuz5RTP8zlmkjUWLFmHhwoX+bSKlZB87sucXjUVfYpzlkFqyjx3Z84vEoi8xznJILdnHjuz5ReKa\nPhGRibDoExGZCIs+EZGJsOgTEZkIiz4RkYnoXvS9Xi88Ho/ehyEiIgV0vWSzqqoKra2tKCwsRHp6\nOoDxZknNzc3wer3Iy8tDRkaGnhEmtYlmU7x8jSLFsWNeus708/PzsWLFiqB9PT09KCoqwvr16/Hp\np5/qefhJj+1lSS2OHfMyfE3/vvvuM/qQkxLby5JaHDvmJuyD3OrqaixdulTU4aXH9rKkFseOuQlp\nw1BbW4usrCw4HA5Fj09NTdU5kXzi4uKCtnmOSCmOHXMzvOjX1dXB4XBg3rx5in/H5XLpmEhO69at\nw9mzZ/3bPEekFMeOuela9I8ePYqWlhZMmTIFmZmZ+M53voO6ujpkZ2ejubkZAwMD2LJli54RJi22\nlyW1OHbMTdeiv3r16uv2Pfnkk3oe0lTYXpbU4tgxL7ZWlhhnaaQWx455sQ0DEZGJsOgTEZkIiz4R\nkYmw6BMRmQiLPhGRibDoExGZCIs+EZGJsOgTkXScTic7hKqkuOgPDg7qmYOISDHeD0A9xd/Iffnl\nl3HPPfcE7Zs2bRpmz56teSgiohuZuB/AxDa/XRwZxTP9qVOnoqGhAW63Gw0NDTh//jy++uorHD58\nWM98RERBeD+A6Cgu+nFxcSgpKUFOTg5KSkrgdruRl5eH4eFhPfMREZGGFBf9xMTEoJ8TEhIAADab\nTdtEREQ3EdghlN1CI6d4TX9sbAx9fX2YPn06+vr6MDY2BgC48847dQtHRHQt3g8gOoqLfmFhId5+\n+218/fXXSE5ORmFhIQBgzpw5uoUjIgqFM3z1FBd9i8WCtWvX6pmFiEgRzvDVU7ymv2/fPj1zEBGR\nARQX/e7ubuzbtw/vvfeefz2fiIjkonh55+c//zluu+02dHR04NChQ0hISOByDxGRZBTP9G+99VYA\nwKxZszBnzhx0dHToFoqIiPSheKb/+uuvIzU1Fd3d3cjNzcW2bdv0zEVERDpQXPTPnz+P7du3Y+bM\nmXrmISIiHSle3tm6dauqgu/1euHxeCL+PSIi0p7imf5tt90W9PMXX3yB22+//aa/U1VVhdbWVhQW\nFiI9PR0A0NHRgZqaGthsNqxcuRKzZs1SEVsbE/24ec0vRYpjRyyef/UUz/SvXLmCAwcOoKKiAhUV\nFfjPf/4T9nfy8/OxYsWKoH0ffvghNm3ahA0bNuDkyZORJ9YQe3KTWhw7YvH8q6e46L/11ltYu3Yt\n4uPj8dBDDyE7O1vVAZOTk/3bE03bRJjoyd3U1MQ78FBEOHbE4vmPjuKin5KSgvj4ePh8PiQkJMBq\nVXenRZ/P598WWfTZk5vU4tgRi+c/OhF12QQAt9sNYLwXjxpqPtRNTU1VdaybiYuLC9rW4xg0OXHs\niMXzHx3FRX9iOWfJkiV45ZVXVB8w8KYrSm/A4nK5VB/vRtatW4ezZ8/6t/U4Bk1OHDti8fxHR3HR\nX7JkCYDx4p+ZmYmkpKSwv3P06FG0tLRgypQpyMzMxPLly3HXXXehrKwMPp8Pq1evVp88SuzJTWpx\n7IjF8x8dxUU/0LUF/+jRoyELeKh96enpKC4uVnNYzbEnN6nFsSMWz796qor+tQYGBrR4GsNxlkBq\nceyIxfOvnrpLcK6h9kNdIiIyliZFn4iI5MCiT0RkIiz6REQmwqJPRGQimhT9VatWafE0RESkM8VF\n/9qbob/zzjv+7ZSUFO0SERGRbiLqshlI1mvzJ5PKykpUVlaKjqGa0+mUtkuizNkB+fPv3r0bu3fv\nFh1DNZGvXcVFn3e/ij3l5eUoLy8XHUM1mXuiy5wdkD9/TU0NampqRMdQTeRrN2zR7+vrQ3d3N4aG\nhtDT04Oenh50dHRwpi9YZWUlhoaGMDQ0JOVsX+ae6DJnB+TPv3v3bni9Xni9Xiln+6Jfu2GLfmNj\nIxobG9HR0YHGxkacO3cOFy5cwMMPP2xEPrqBwFmCjLN9mXuiy5wdkD9/4Axfxtm+6Ndu2N4799xz\nDwDA4XCw3wURkeQUr+mz4MeWRx55JOS2LAK7JMrWMVHm7ID8+QMvEZfxcnHRr92I7pxVVVWFkZER\nTJ06FWvWrIHNZtMzG91EQUGB/61hQUGB4DSRk7knuszZAfnzl5aW+pd1SktLBaeJnOjXruKiX1FR\ngdWrV8Nut+PKlSs4ePAgioqK9MxGYcg4ww8k4yxzgszZAfnzyzjDDyTytau46MfFxcFutwMAZsyY\nofrG6KQdGWf4gWScZU6QOTsgf34ZZ/iBRL52FVdun88X9PPo6CgAoLW1VdtERESkG8VFPzExESdP\nnsTIyAg+/PBDOBwOAMCnn36qWzgiItKW4uWdjz/+GNnZ2fjqq6/8+1577TV89tlnKCws1CUcERFp\nS3HR37x5MzIzM6/b39bWhuHh4etulk5ERLFH8fJOqII/sb+6ulqrPEREpCNNLsHxer1aPA0REelM\nk6JvsVi0eBoiItKZ4jV9LbW1teHUqVPwer3Iy8vDnDlzRMTwd+iT9ZrfiQ59sl6vL/P5f+655wAA\nv/vd7wQnUUfmcw8AO3fuBADs2rVLcBJ1RL52hRT9pqYm/zcCDx8+LKzoy/xVbgBSt2EA5D7/p0+f\nFh0hKjKfewBob28XHSEqIl+7Qr5W6/F4/P2wRV31w57cYsl8/idm+dduy0Lmcw98M8u/dlsWol+7\nmhT9xMTEiB6/ZMkS7Nq1C8899xzmz5+vRYSIsSe3WDKf/8BZvowzfpnPPRA8y5dxxi/6tat6eWds\nbAxxceO/np+fH9HvfvLJJ/j9738Pn8+H8vLysMs7qampamMqZsQxtBT44bnFYpEu/7Vkzi9zdoD5\njSb6tau46Hd1dflbLwDAG2+8obrL5sQ7A4vFgm9961thH+9yuVQd52ZWrVqFI0eO+Lf1OIaeioqK\nsGfPHv+2bPllPv+5ubn+GX5ubq5U2QG5zz0AZGRk+Gf4GRkZ0uUX/dpVvLzz/vvvB/0cHx+v+qCZ\nmZmoqKhARUUF5s6dq/p5olFaWgqr1Qqr1Srlh1kFBQVITk5GcnKylB/klpaWwmKxwGKxSHf+A6/Y\nkfHqHdnHfuAVOzJevSP6tat6eefarpuRyM7ORnZ2turf1wp7cos1ffp00RFUy83NFR0hKrKP/YyM\nDNERoiLytWvxhane9fX1cLvd+Pjjj3HnnXcCGF/P7+vrMyR4TU2N/7g0eTidTjzzzDMAgKeeekr6\n/u5EsaahoSHkH/ewM/3s7Gx4PB5cunQJixcvhs/ng81m899QhUiN/fv3B23/4Q9/EJiGyDzCFv2J\n4r5mzRqkpaXpHoiIiPSj+IPcwCt3iKIVeI9W2e/XSiQTxUX/4sWLGBgYwMDAAN566y309/frmYsm\nuUWLFmHhwoVYuHAh1/OJDKS46L/77rtwu904cuQI7r77bhw+fFjPXGQC69ev5yyfyGCKL9m89dZb\nYbfbMWXKFEyfPh3Jycl65iIT4AyfyHiKZ/qjo6PweDz+1gvRXKdPRERiKC76VqsV//73v5GTk4Oe\nnh4MDAzomcsQjz/+OB5//HHRMVTbuXOnlF0GJ2zbtg3btm0THUOVkpISlJSUiI6hWnFxMYqLi0XH\nUO3pp5/G008/LTqGaiJfu4qXd9atWwev1wur1Yr29vZJ8dZc9g+jZewwGGhoaEh0BNU8Ho/oCKZ2\n/vx50RGiIvK1G1FrZat1/OEZGRlB35KVsfgEzvBlnO3L3lM8cIYv22w/cIYv42w/cIYv42w/cIYv\n42xf9GtXk376MvYUD5zlyzjjl72neOAsX7YZf+AsnzN+4wXO8mWc8Yt+7WpS9PmhLhGRHDQp+oE3\nBZBFYB9/JT39Y01gl0EZOw4GXvIr2+W/Npst5DYZY8GCBSG3ZSH6tSvkHrmx4KWXXgq5LQvZe4r/\n4x//CLktg71794bclkVZWVnIbVn88Y9/DLktC9GvXdX99CcDGWf4gaK5kU0skG2GH4gzfLFknOEH\nEvnuXJOiL+ua/m9+8xvREVRzOp0YHR31b8t4Ce0999wjOoJqTz75pOgIqjmdzqBtGcfO7bffLjpC\nVH70ox8JO7YmyzsyrikD433cA/u6y+TafvQyqqmpQU1NjegYqnDsiCXz2AGA8vJylJeXCzm26qLf\n2dnp3166dKkmYYzkdDrR1NSEpqamoJkPGWP37t3wer3wer3YvXu36DgRkX3sXLx4MeS2LGQeOwBQ\nWVmJoaEhDA0NobKy0vDjKy761w7u+vp6zcMYSfbZjuz96ANnabLN2GQfO263O+S2LGQeOwCCZvgi\nZvuKi/7nn38e9DM/yBKL/eiJSA3Vyzter1fLHIYLXJKScXkKkLsffeANm0PdvDmWyf4uS/R14tGS\neewAwCOPPBJy2yhhr95588038fXXX+Ozzz7D6OgofD4fLBaL9LdPPHXqVNB2QUGBwDTqyDzDLy0t\n9b81Ly0tFZwmMhPvsia2ZbNr1y5/zx0Zv+Mh89gBgIKCAv+yjoi6E7boP/TQQwCAQ4cO4eGHH9Y9\nEJmHjLO0CTLO8APJOMMPJPPYAcTM8CcoXt758Y9/rOmB29racODAARw6dMh/vbmRZH+LDgBbt27F\n1q1bRcdQ7ciRIzhy5IjoGKo888wzeOaZZ0THUK29vV3KRn0TZB47ALBnzx7s2bNHyLEVfzlr4o5Z\nWrh69SqGhoawbt06zZ4zUoFvy2V8iw4AIyMjoiMQkWQUz/QrKyvR3t6OtrY2vPzyy7hw4YLqgzY2\nNmJ4eBgVFRXCWqPK3lM8cIYv42xf5vMvc3aA+UUTnV9x0Xe73UhPT8eZM2ewceNGNDQ0qD7oF198\ngb6+PhQVFeHzzz+X/kogEQJn+ZzxE5FSitdsbDYbrFYrkpKSAACJiYnqDxoXh5UrVwIAZs+eDZfL\nhRkzZtzw8ampqaqPpZQRx9AT84sjc3aA+UUzOr/ioj8yMoLBwUF/0Y/G/Pnz0dLSgqysLLhcLixZ\nsuSmj3e5XFEfMxwjjqGl+Ph4/wfg8fHx0uW/lsz5Zc4OML9oRudXvLyTnp6O119/HXfffTfOnTsX\nVc+OnJwcnDlzBgcOHEB8fLymHxIrJXtP8aysrJDbspD5/MucHWB+0UTnV1xtly1bhmXLlgEAsrOz\noy40sl4mGStkvscsgKBGU5WVlVJ9OU721sSy56foqGrDEBcXp8kyj2hlZWVSzhSA4HsYyHg/A9FN\np6Ihe8M12fMD47dolfE2rbFAde+dxsZGLXNQhKZOnRpym8gMfD6flJOdWKC66EdznT5Fr6mpKeS2\nLGRenpL93Muef+PGjSG3ZSH6Ov2wa/rHjh1Dbm4uTp8+7d/n8/nQ0tKiazAiolBkX9oULWzRnzip\n7e3teOCBB/z7L126pF8qDezcuVOz3iIZGRlSdiMkIrpW2KIf+CWqmTNn+vfH+jqy0iJdXFws7Ye5\nRESRUrymP1H8J+Tl5WmdhSIw0c/92m0ioptR/UHuLbfcomUOIiIyQNjlnWtviD7hxIkTeOyxxzQP\nRMqsX7/e389dxi+6WSwW/+dFvN6azCQpKQnDw8P+baOFnenX19fDarWitbUVH3zwAdxuN06ePAm7\n3W5EProB2e8HUFJSEnJbBmvWrAm5LYs5c+aE3JaF7Od/586dIbeNErbob9iwAdnZ2RgYGMCjjz6K\nnJwclJaWwuPxGJGPbkD0tb7RCrxrkKg7CKkVeMcmGe/e9OWXX4bclkXgfXFlvEdu4B3XRNx9Lezy\nTkpKCoDxTo6BEhIS9ElERBSGjDP8WKG44drQ0BC8Xi+sVis8Hg/6+/v1zEVEJhXJd2yUvNPi92yC\nKS76BQUF2Lt3LxISEuB2u6XqikhE8uB3bPSl+JJNu92OLVu2YMOGDdiyZQvS0tL8/zbxSTQZR3RP\n7mjJnD/waiMZrzyyWq0ht8kYmzdvDrltFE3+j1dXV2vxNERSkL33S+A9qXl/auPt3bs35LZRNLll\nFQeO9iJZ1wx39Y6INU3Z8xPpRfSkQZOiL+Nb3Fgn+7qm7PmJJisu6BERmQiLPhGRibDoExGZiCZF\nPzExUYunISIinan+IHdsbAxxceO/np+fr1kgolgg+9VHsucn/Sgu+l1dXXA4HP6f33jjDRQVFekS\niki0cEVuolDG6pVHsucn/She3nn//feDfr62AVukhoeH8eyzz+Ly5ctRPQ+RCCK+Samlp556SnQE\nEkT1mn60Xyqora3Fgw8+GNVzEIkie+8pGe/BMFnk5uaG3DZK2OWd+vp6uN1utLS04NixYwDG1/NH\nR0dVH9TlciE5OVnIXWOIiEQ6ffp0yG2jhC362dnZ8Hg8uHTpEhYvXgyfzwebzRbVnbOOHz+OtWvX\norm5WfVzEBFR5MIW/YnivmbNmqDOmtHo7OzEoUOH0NnZiczMTKSnp9/08ampqZocV9Tz6435xZE5\nO8D8scDo/wbFV+8EXrkTre3btwMYv+n69OnTwz7e5XJpdmwRz6835hdH5uwA88cCo/8bVH+Q29jY\nGNWBe3t7cfz4cZw7dy6q5yEikkngiolWqyeRUP3lrAsXLmDx4sWqD2y327Fjxw7Vv09EJKOenp6Q\n20YJW/SPHTuG3NzcoE+ZfT4fWlpadA1GRETaC1v0J67Hb29vxwMPPODff+nSJf1SERGRLsIW/ZUr\nVwIAZs+ejZkzZ/r3T506VbdQRESkD8Uf5E4U/wl5eXlaZyEiIp2pvnrnlltu0TIHEREZIOzyjtPp\nDLn/xIkTeOyxxzQPRERE+gk706+vr4fVakVrays++OADuN1unDx5Mqo2DEREJEbYmf6GDRuQkpKC\nTz75BI8++igAICcnB6+++qru4YiISFthi35KSgqA6/vnJyQk6JOIiEhikdy1DLj5ncv0uGuZ4m/k\nDg0Nwev1wmq1wuPxoL+/X9MgRESTgZIiLfLOZYqLfkFBAfbu3YvExESMjIxIfxMJIiIzUlz07XY7\niouL4XK5kJaWhqGhIT1zERFNWmVlZWFvSK8XxUW/tbUVp0+fxvDwMDZt2oQ33ngDGzdu1DPbDf36\niV+hx9Wr2fNpdfLTUu346wvPa/JcRER6UFz0T506hZ/+9Kc4cOAALBaL/wNeEXpcvdhTWCLs+Dey\n+eBe0RGIiG5K8TdyExMTAQAWi0W3MEREpC/FRX94eDjo57GxMc3DEBGRvhQv7+Tk5GDv3r3o7+9H\nRUUFfvCDH+iZi4iIdKC46C9YsAAZGRno7OxERkbGdV/WIuX+z69+iSu9VzV7Pq0+iJ5hn4a/Pf+i\nJs9FRLEpotslJicnY+7cuXplMY0rvVfx/x5RfadK3fypXLs/REQUm1S3ViYiIvnE3nSTYtoTv/ol\nXBouTQHaLU+l2qfhBS5PEd0Uiz5FxNV7FT/a6hMdI6R3/8XlKaJwuLxDRGQiLPpERCYibHnH6XSi\nubkZXq8XeXl5yMjIEBWFTOSXv3oCV3tdmj2fVp9HTLOn4sXnX9DkuYhuRljR7+npQVFREQCgsrKS\nRZ8McbXXBduOfNExrnP171WiI5BJCFveue+++0QdmojItISv6VdXV2Pp0qWiYxARmYLQSzZra2uR\nlZUFh8Nx08elpqZety9W2xiHyioT5hdHSfbikhK4eno0O6Zm35FIS0PZ3vCvyU0lW3Clp0uTYwIa\ntiBJc+A/e/+tyXNFQsR4FVb06+rq4HA4MG/evLCPdbmu/+AtVvvph8oqE+YXR0l2V08PErf/XwPS\nRMb1v/+jKP+Vni4U79hjQKLIlP19s5CxI+KYQop+V1cX6urqkJ2djebmZgwMDGDLli0iohARmYqQ\nou9wOPDkk0+KODQRkamxDYMgfyrnTWiIyHgs+oLEZmtlZX+I3v0Xb5lJJKvYqzwU82K34Rr/GBGF\nI/w6fSIiMg5n+mQ6HrY8IBNj0SfTicXeO/xDREbh8g4RkYlIOdNPS7XHZBuGtFS76AhkAiP/+z+i\nI5jWr5/4NXpcsdcGIy01DX994a+KHitl0f/rC89r9lzFxcUoKyvT7PmI9BaLbRjM8oeox9WDf616\nSXSM62yteVzxY6Us+rKbYZ+GP5XH3v1cZ9iniY5ARDpj0Rfgb8+/qNlz8Z0KEUWCH+QSEZkIZ/oU\nkVT7NLz7r9hbmgLGs1HsK/v7ZtERTI1FnyLygoZLUwCXp8woVvvpmwWXd4iITIQzfTKVafZUXI3B\nb79Os8t7m0eSC4s+mcqLz7+g2XNxaYpkxOUdIiITYdEnIjIRFn0iIhNh0SciMhF+kEskkWn2Gbga\ng83NptlniI5ACrHoE0nkxef/ptlz8eojcxJS9Ds6OlBTUwObzYaVK1di1qxZImIQEZmOkDX9Dz/8\nEJs2bcKGDRtw8uRJERGIiExJSNFPTk72byckJIiIQERkSkKKvs/n82+z6BMRGUdI0fd4PCIOS0Rk\nehZf4LTbIAcOHMC6desAAG+++SYeeuihGz62pqbGqFhERJPKqlWrrtsnpOhfvnwZx44dg8/nw+rV\nq+FwOIyOQERkSkKKPhERicE2DEREJsKiT0RkIqYo+l6vV7orhmTMHEj2/EST1aTvvVNVVYXW1lYU\nFhYiPT39po+NlfYQkWSur69HZ2cn4uPjsXDhQnz729++6X4jRJIfANra2nDmzBnYbDbk5+cjPj4e\nu3btwoIFC+Dz+XDlyhVs27bNgOTXczqdaG5uhtfrRV5eHjIyMm742FgZP5FkjsXxE0l+ILbHTyTn\n0bDx4zOBxsZG36VLl8I+7uDBg/7t119/Xc9IYSnNHOjw4cMR7deT0vx9fX2+xsbG6/Z//fXX/u2q\nqipNs0Xi2LFj/u1w5zFWxk8kmQPFyviJJH+sj59AsTJ+TLG8o5Ss7SHa2trw5z//GcPDw4r2x5LG\nxkYMDw+joqIC58+f9+9PSkoCAHR3d2PmzJmi4uG+++5T/NhYGT+RZAZib/xEkj/Wxw+g/DwaNX4m\n/fJOJHyStofIzMzEb3/7W/T39yvaH0u++OIL3HrrrSgqKsLhw4eRlZUFq/WbuUhjYyPuvvtugQnH\nVVdXY+nSpTd9TKyNHyWZgdgdP0ryyzB+lJ5Ho8YPZ/oBZP7gMS4uDufOnVO8P1bExcVh5cqVAIDZ\ns2fD5XIF/fvw8DASExMFJPtGbW0tsrKywn6JMJbGj9LME2Jt/CjNL8P4AZSdR6PGj2mLfmNjI5xO\nZ9C+wLdfsbgkEipzb2+vf/vy5cth94sUKv/8+fPR0tICAHC5XJg2bZr/37xeb9CsTYS6ujo4HA7M\nmzcvaH8sj59IMsfi+Ikkf6yPnxudR5HjZ9Iv7xw9ehQtLS2YMmUKMjMzsXz5cgDjn6pbLBYsWrTI\n/9i77roLZWVl/vYQokSS+cSJExgZGcHY2Bi++93vht0fa/lzcnKwf/9+nDt3DmlpaYiL+2ZIfvnl\nl5g/f76h2QN1dXWhrq4O2dnZaG5uxsDAALZs2QIgdsdPpJljbfxEmj+Wxw9w4/MocvywDQMRkYmY\ndnmHiMiMWPSJiEyERZ+IyERY9ImITIRFn4jIRFj0iYhMZNJfp090I8eOHUN7ezsyMjKwYsUK0XGI\nDMGiT6a1YsUK9PT0oLGxUXQUIsOw6JP0Tp06hbfeegubN29GfX09kpOTsWjRIqSlpaG6uhrJyckY\nGxtDeno68vLygn73Rt9NfO+999Dd3Y34+HiMjIwgPz8ft9xyC86ePYuKigqsWrUK9957r//n+++/\nH8uXL0dXVxdqa2uRkJCA0dFRZGdn44477gAAfPTRR6iursa8efOQkpKC0dFRFBUV+Y954cIFfPrp\np4iLi4PX68Xly5fxxBNPAADOnz+PM2fOICkpCSMjI1i6dCnmzp2r0xmlSU23ps1EBnr11Vev65v+\nz3/+0zc6Our/+cSJE762tragx3R1dflqa2uD9jmdTt/x48f9P7vdbt9rr73m//m9997ztbe3+/+t\nvLz8hrleeeWVoJ9ffPFF//bHH3/s+/zzz30+33j/9/379wc99sSJEz6fz+cbHBz07dmzJ+jfXnvt\ntaD/NiKlONOnScFiseDBBx8M2tfR0YHDhw/7f/Z6vYiPj8ftt99+0+e6ePEiRkZGcPDgQQDj7wYG\nBwf9/75s2TKUl5fjZz/7GU6cOIF7773X/2/d3d2oq6uDzWaD1WpFX19f0HPPmDHDv52UlAS32w1g\nvBnXtQ0AAlGUAAABk0lEQVTGli1bBgDo7OxEf39/UJ7h4WH09vYq7qJJNIFFnyYFX4hlmgULFmDt\n2rVBTbjGxsbC/u4dd9yB//73v0FLQYG/Z7PZkJKSgv7+fnR3dwcV3rfffhsbN270d3fct29f2JwA\ncNttt6GhoQHf//73/fu++uorzJw5ExkZGZg1axYKCwv9/+b1ekM+D1E4bLhG0qusrERDQ4N/7fx7\n3/seMjMzMTg4iCNHjsBms8Hn82FoaAg//OEPMXfuXHR0dODkyZMYHBxEV1cX5s2bh6ysLCxevBgA\n0NDQgC+//BJWqxUejwcWiwXr1q3zH3NwcBB/+ctfUFhYGNQp8Z133sHVq1dhsVgwOjqKixcvYs2a\nNcjNzUVtbS2OHz+OHTt2wOFwYM+ePfB6vfjFL34BAGhqasLZs2eRkJCAsbEx2O123H///QDG/wB8\n9NFH/j9gAwMD+MlPfoKUlBRDzjFNHiz6REQmwi9nERGZCIs+EZGJsOgTEZkIiz4RkYmw6BMRmQiL\nPhGRibDoExGZCIs+EZGJ/H/h+jKGUS6fqgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x193f62518>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='edit_dist_st_pt_avg', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "('min() arg is an empty sequence', 'occurred at index 198')",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-195-cfb160e3f0ee>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_pd_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'product_description'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_bl_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'bullet'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_br_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'brand'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\linghao\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, broadcast, raw, reduce, args, **kwds)\u001b[0m\n\u001b[0;32m   3970\u001b[0m                     \u001b[1;32mif\u001b[0m \u001b[0mreduce\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3971\u001b[0m                         \u001b[0mreduce\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3972\u001b[1;33m                     \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreduce\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mreduce\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3973\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3974\u001b[0m                 \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_broadcast\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\linghao\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36m_apply_standard\u001b[1;34m(self, func, axis, ignore_failures, reduce)\u001b[0m\n\u001b[0;32m   4062\u001b[0m             \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4063\u001b[0m                 \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4064\u001b[1;33m                     \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4065\u001b[0m                     \u001b[0mkeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4066\u001b[0m             \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-195-cfb160e3f0ee>\u001b[0m in \u001b[0;36m<lambda>\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_pd_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'product_description'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_bl_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'bullet'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'edit_dist_st_br_raw'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'brand'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-186-59df692ee16e>\u001b[0m in \u001b[0;36mcalc_edit_distance\u001b[1;34m(row, col)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m     \u001b[0mdists\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0medit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tokens_'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tokens_search_term'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-186-59df692ee16e>\u001b[0m in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcalc_edit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m     \u001b[0mdists\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0medit_distance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mw\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tokens_'\u001b[0m\u001b[1;33m+\u001b[0m\u001b[0mcol\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mw\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrow\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'tokens_search_term'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdists\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: ('min() arg is an empty sequence', 'occurred at index 198')"
     ]
    }
   ],
   "source": [
    "df['edit_dist_st_pd_raw'] = df.apply(lambda x: calc_edit_distance(x, 'product_description'), axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df['edit_dist_st_pd_min'] = df['edit_dist_st_pd_raw'].map(lambda x: x[0])\n",
    "df['edit_dist_st_pd_avg'] = df['edit_dist_st_pd_raw'].map(lambda x: x[1]) / df['len_search_term']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x171a6d6a0>"
      ]
     },
     "execution_count": 215,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEPCAYAAACukxSbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X1QVOe9B/Dv7vKiSEJYkBjcSdAiBTQpVWuIjYYbtVJf\nCiVOKnmhefdOcvvH7cRpmzuZTCfTaac6c2famknu3LGdaKKJgBojIUQSCGLNGzGJLMQoaESriKwy\ngLztnvsHdze7sLhnl3POsw/n+5nJzLPLcs7Xk8OPh+ec8zwWRVEUEBGRKVhFByAiIuOw6BMRmQiL\nPhGRibDoExGZCIs+EZGJ6F70PR4P3G633rshIiIVYvTceHV1Ndrb21FUVIT09HQAgNPpRGtrKzwe\nD/Lz8+FwOPSMQEREfnQt+oWFhXA6nQHvdXV1oaSkBABQVVXFok9EZCDDx/SXL19u9C6JiOj/CbuQ\nW1NTg8WLF4vaPRGRKQkp+nV1dcjMzERaWpqI3RMRmZauY/rBNDQ0IC0tDXPnzlX1+draWp0TERFN\nTStWrBj3nq5F/9ChQ2hra8P06dORkZGB73//+2hoaEB2djZaW1vR29uLsrKykNtZuHChnjGJiKac\npqamoO/rWvRXrlw57r3nnntOz10SEdF18IlcIiITYdEnIjIRFn0iIhNh0SciMhEWfSIiE2HRJyIy\nERZ9IiITYdEnIjIRFn0iIhNh0SciMhEWfSIiE2HRJyLpOJ3OcavykTos+kQknfLycpSXl4uOISUW\nfSKSitPpREtLC1paWtjbjwCLPhFJxb+Hz95++Fj0iYhMhEWfiKSyYcOGoG1Sx/A1comIJiM3Nxc5\nOTm+NoWHRZ+IpMMefuRY9IlIOuzhR45j+kREJsKiT0RkIiz6REQmwqJPRGQiLPpERCbCok9EZCIs\n+kREJsKiT0RkIroXfY/HA7fbrfduiIhIBV2fyK2urkZ7ezuKioqQnp4OALhw4QJqa2ths9lQUFCA\nWbNm6RmBiIj86Fr0CwsLxy1y8PHHH+PBBx8EAOzbtw/FxcV6RiAiIj+Gj+knJCT42nFxcUbvnojI\n1Awv+oqi+Nos+kRExjK86POiLhGROIZPrTwwMBC0fT3Jycl6xSEiMhVdi/6hQ4fQ1taG6dOnIyMj\nA8uWLcOSJUuwa9cuKIqClStXqtqOy+XSMyYRkWnoWvSDFfX09HSUlpbquVsiIpoAn8glIjIRFn0i\nIhNh0SciMhEWfSIiE1Fd9Pv6+vTMQUREBlB9987rr7+OH//4xwHvJSUlYfbs2ZqHIiIifaju6c+Y\nMQNNTU0YGhpCU1MTTpw4gbNnz+LgwYN65iMiIg2pLvoxMTF46KGHkJeXh4ceeghDQ0PIz89X/VQt\nERGJp7rox8fHB7z2TpZms9m0TURERLpRXfRHRkZw5coVAMCVK1cwMjICAFi4cKE+yYiISHOqL+QW\nFRXh3XffxbVr15CQkICioiIAwK233qpbOCIi0pbqom+xWLB27Vo9s1CYvKuS5ebmCk4SGdnzkzg8\ndyKnenhn9+7deuagCJSXl6O8vFx0jIjJnp/E4bkTOdVF/9KlS9i9ezc+/PBD33g+ieN0OtHS0oKW\nlpZx6xDLQPb8JA7PnclRXfR/8YtfYOPGjcjKysKBAwd4f75g/r0cGXs8sucncXjuTI7qon/zzTcD\nAGbNmoVbb70VFy5c0C0UERHpQ3XR37t3L95//3288cYbSEpKwuOPP65nLgphw4YNQduykD0/icNz\nZ3JU371z4sQJPPHEE5g5c6aeeUil3Nxc5OTk+NqykT0/icNzZ3JUF/1HHnmEBT/KyN7LkT0/icNz\nJ3Kqi/4tt9wS8PrMmTO47bbbNA9E6sney5E9P4nDcydyqov+5cuX0dDQAI/HAwBobW3Fc889p1sw\nIiLSnuoLue+88w7Wrl2L2NhYrFu3DtnZ2XrmIiIiHagu+omJiYiNjYWiKIiLi4PVypUWiYhkE9Ys\nmwAwNDQEYHQuHiIikovqMX3vcM6CBQvwxhtv6BaIiIj0o7roL1iwAMBo8c/IyMC0adN0C0VERPqI\naGB+bME/dOiQJmGIiEhfmlyN7e3t1WIzRESkM02KPi/qEhHJgfddEhGZiOoLuVo6ffo0Pv30U3g8\nHuTn5wtbZ1f2JdeYXxyZswPA9u3bAQCPPfaY4CSRqaqqAgCsWbNGcJLIiDz+Qnr6LS0t2LBhA+6/\n/3589dVXIiIAkH/JNeYXR+bsAFBbW4va2lrRMSJWUVGBiooK0TEiJvL4Cyn6brcbHo8HHo9H2K2f\nsi+5xvziyJwdGO1len/+vD1OmVRVVaG/vx/9/f2+Hr9MRB//iIv+mTNnfO0VK1aE9b0LFizAli1b\nsHXrVsybNy/SCJMi+5JrzC+OzNkBBPQwZezt+/fwZeztiz7+Icf0KysrfW3vDJuKouDkyZP43e9+\nB2B0Xp5wfPHFF/jNb34DRVFQUVERckw/OTk5rO2rERMTE9DWYx96Yn5xZM4ejGz5/e8WtFgs0uUf\ny+j8IYv+jBkzsHr1ajQ2NiI2NhZ5eXn4/PPPJxU0Pj4ewOj/sBtvvDHk510uV8T7mkhxcbHvekJx\ncbEu+9AT84sjc3Zg9C/z9957z9eWLX9JSQl27Njha8uWX/TxDzm8s3r1agDAhQsXsGTJEsTFxeHO\nO++c1ANZGRkZqKysRGVlJebMmRPxdibDu+RaTk6OlHdgML84MmcHRu8YsVqtsFqtUt69s2bNGiQk\nJCAhIUHKu3dEH3/Vt2yOfQDLO9QTiezs7KiYj1/2JdeYXxyZswPhX4eLNvfdd5/oCJMi8virLvpu\ntxtdXV1ITU1FV1eXb4plmcnYS/PH/OLInB2Q9/58Lxl7+P5EHn/VRf/nP/85qqur0dvbi8TEROl7\nOkREZqS66MfExGDdunV6ZiEiIp2pLvojIyOorq7G4OAgZsyYgVWrVsFms+mZjYiINKa66FdWVmLl\nypWw2+24fPky9u/fj5KSEj2zERGRxlQ/kRsTEwO73Q4ASElJ4cLoREQSUl25FUUJeD08PAwAaG9v\n1zYRERHpRnXRj4+Px9GjRzE4OIiPP/4YaWlpAIAvv/xSt3BERKQt1WP6n3/+ObKzs3H27Fnfe3v2\n7MHXX3+NoqIiXcIREZG2VBf9hx9+GBkZGePeP336NAYGBoRNkUxEROqpHt4JVvC979fU1GiVh4iI\ndKTJLTiTmYdHpKqqKikXYfDavn27lItgeG3duhVbt24VHSMiMmcHgLKyMpSVlYmOEbFNmzZh06ZN\nomNETOTPriZr5I6djE0W3gUYZJ3Hw7sAg6zzqHz22WeiI0RM5uzAd3ffyaqnp0d0hEkR+bNr2pvt\nueSaWP69ZNl6zDJnBxDQw5ext+/fw5exty/6Z9e0RZ9Lronl31OWrdcsc3YgsJcvY4/fv5cvY49f\n9M+uaYs+EZEZaVL0vcsfysR/EQYZF2TwX4RBxgUxFi1aFLQtA5mzA0BsbGzQtiz8l1hVs9xqtBH9\nsxtx0R8ZGfG1CwsLNQljJC65Jtazzz4btC0DmbMDwKuvvhq0LYtXXnklaFsWon92Vd+909nZ6Zt6\nAQDeeust6WfZlLGH70/GHr4/GXvJXjJnB+Ts4fuTsYfvT+TPrkUZO5PaBPbv3x8w3cKBAwewfv16\n3YJ51dbWYuHChbrvh4hoKmlqagr6yyXi4R2VvyuIiCiKhBzeOXLkCIaGhtDW1ob6+noAo+P5Mt7q\nRURkdiGLfnZ2NtxuN86dO4f58+dDURTYbDbfgipERCSPkEXfW9xXrVqF1NRU3QMREZF+VI/p+9+5\nQ0REclJd9E+dOoXe3l709vbinXfekfLxZyIis1Nd9D/44AMMDQ3hvffew1133YWDBw/qmYuIiHSg\nuujffPPNsNvtmD59Om666SYkJCTomYuIiHSguugPDw/D7XYjJmb02i/v0yciko/qaRisViteffVV\nrF+/Hl1dXejt7Z3Ujk+fPo1jx47BZrOhsLBQyGPhL7zwAgDg97//veH71sLmzZsBAFu2bBGcJDKl\npaUAgF27dglOEj6ZswPy53/ggQcAAK+//rrgJJERefxVF/3i4mJ4PB5YrVZ0dHQgNzc34p1evXoV\n/f39KC4ujngbWjhx4oTQ/U9WR0eH6AhEQnCkIXJhTcNgtY5+3OFwBMyHE27xaW5uxsDAACorK4UV\nXm8vf2xbFt5e/ti2LLw9nbFtGcicHZA/v7eXP7YtC9HHX5P59MNdPejMmTO4cuUKSkpK8M033whZ\nWN3/l42MPX7/X7Ts8ZOZ+Pfy2eMPnyYLo4d74GNiYlBQUAAAmD17NlwuF1JSUib8fHJy8mTiqWLE\nPvTE/OLInB1gftGMzq9J0bdYLGF9ft68eWhra0NmZiZcLhcWLFhw3c+7XK7JxAsqKyvL18PPysrS\nZR96cjgcvh6+w+GQLv9YMueXOTsgX36LxeLraFosFunyj2V0fiFr5Obl5eHYsWPYt28fYmNjfbeB\nGsn/jh0Z797xv2NHxrt3/O9akO0OEpmzA/Ln979jR8a7d0Qff+Or7f/bsGGDqF37ZGVliY4wKQ6H\nQ3QEIiHCHV2g76heOet69u3bp9vtl1w5i4gofJqvnOWPPU4iIjlEXPQvXrzoay9evFiTMEZzOp1w\nOp2iYxAZbvPmzVI+3+H1wgsvSPl8jZfI46+66I8tjkeOHNE8jNHKy8tRXl4uOgaR4To6OqR+vuPE\niRNSPl/jJfL4qy7633zzTcBrm82meRgjOZ1OtLS0oKWlhb19MhXZn+bm0/STE/HwjoinaLXk38Nn\nb5/MRPanufk0/eSEvGXz7bffxrVr1/D1119jeHgYiqLAYrFw+UQiIgmFLPrr1q0DABw4cADr16/X\nPZBRNmzYgBdffNHXJjKL1NRUdHV1+dqyGfs0vWzGPk1vNNXDOz/96U/1zGG43Nxc5OTkICcnZ1LT\nRBPJZubMmUHbsuDT9JOj+olcEVMl6I09fCI5ydjD9yfy2SbVT+RWVVXhjjvuwMjICI4cOYIlS5Yg\nMzNT73x8IpdIY06n0ze0+fzzz/Mv3Slq0k/kDg0NIT09HceOHcMDDzyApqYmTQOKUFZWhrKyMtEx\nIlZaWirlIhheMueXObt/kZe14Mt8/AGx+VWP2dhsNlitVkybNg0AEB8fr1soowwPD4uOQERkKNU9\n/cHBQfT19fmKvuz8e/gy9vZFL7k2WTLnlzk7wPyiic6vuuinp6dj7969uOuuu3D8+HGcOnVKz1y6\n8+/ls8dPRGahenhn6dKlWLp0KQAgOzvbkIu4RESkrYimYYiJiZF+mCc2NjZom4hoKot47p3m5mYt\ncxju1VdfDdqWhegl1yZL5vzPP/980LYsZD72AI//ZEVc9E+ePKllDiFiY2PZy6ewcbI+sXj8Jyfk\nmH59fT0WLVqEzz77zPeeoihoa2vTNZgRZOzhe1VVVQW016xZIzBN+LZu3RrQfvbZZwWmCY9/h0fG\nzo/Mxx4Auru7g7ZlIvIvrJA9fe8Dux0dHcjNzUVubi7mz58/JWbZlHn1nYqKiqBtWfh3IvzbMpD9\nzi+Zjz0AXLlyJWhbJlH9cFZBQQEAYPbs2QGTM82YMUO3UEaRcS5uIrPzX8BJ9sWcRFA9pu8t/l75\n+flaZzGU7KvvJCQkBG3LgndPUaQGBweDtmUhzcNZY91www1a5jCc7KvveOdDH9uWhexDJCSO2+0O\n2iZ1Qg7vTLR+bGNjI5588knNAxERkX5C9vSPHDkCq9WK9vZ2fPTRRxgaGsLRo0dht9uNyKcb//m4\nZZyb23/FIxlXP7JarUHbRKSvkD9tGzduRHZ2Nnp7e/Hoo48iLy8Pjz32mPR/Vsm++k5/f3/Qtiw8\nHk/QNtFUJ/p6XMjhncTERADjL7bFxcXpk0gjmzdvVr3SfKiLKQ6HQ8iyZtfDcU0iOYm+nqV6wrX+\n/n54PB5YrVa43W709PTomWvS1Bbp0tJSKR9FT0pKQmdnp68tG9GLQ5uZxWLxPX9jsVgEpyGjqR5M\nXbNmDXbu3Indu3fjtddem/QToAMDA/jjH/+I8+fPT2o7ZpWSkhK0LQvRi0NPRk5OTtC2LKZPnx60\nTcYQfbuy6qJvt9tRVlaGjRs3oqysLODi4cDAQNg7rqurw+rVq8P+Phrlv6i7rAu8OxwOKXv5sh/7\n++67L2hbFosWLQraloXo46/JbRM1NTVhfd7lciEhIUH66ZlF8i5sPbYtky1btkjXywfkP/Y7duwI\n2paF7NNIiD7+mhT9cO++OHz4MO6++24tdk1ERGFQfSH3esK9GHTx4kUcOHAAFy9eREZGBtLT06/7\n+eTk5MnEC0nv7Rsh2v4NmzZtwpkzZzTZ1m233YZXXnlFk23pIdqOfbiYXyyj82tS9MP1xBNPABh9\n2vemm24K+XmXy6VrHr23b4Ro+zf86U9/UvU5tXdPRdu/z180Z1OD+cUyOr+wRyG7u7tx+PBhHD9+\nXFQEqYlefcfMZD/2zC+W6PzCir7dbsdTTz2Fn/zkJ6IiSM1/TqSJ5kcifcg+Q+umTZuCtskY27dv\nD9o2iiZFPz4+XovNUBi4ZJw4ss/Q6v9gZbQ/ZDmRXbt2SdnLB4Da2tqgbaNEPKY/MjKCmJjRby8s\nLNQsEKlz6tSpoG0i2YUzhYoa0TaNiuh5p1QX/c7OzoAlEt966y2UlJToEopCGxoaCtomCuXGG2/0\n9fBvvPFGwWnGm+pTqIimuuj/85//RFFRke81Vzsiil5qe8s9PT1STjhIkYt4eMc7YROJERsb65uh\nj7+AaaxQRXrTpk3o6elhT9mEVC2iUldXh7a2NtTX16O+vh61tbVc4k6w3/72t0HbRGpE88NupK+Q\nPf3s7Gy43W6cO3cO8+fPh6IosNls0q+cFe3CuZgVav4X/nlORF4hi763uK9atUrKZflkpaZIO51O\nvPjii/wTnYhUUz2m73/nDkWH3Nxc0RGmrKm+8hqZV8QXcpubmzF//nwtsxBFjVBF2lvo+VcWySbi\nJ3JPnjypZQ4iqbDYk6xC9vTr6+uxaNGigMUKFEVBW1ubrsGIiEh7IYu+9378jo6OgMnRzp07p18q\nIqIpKi0tDZ2dnb620UIO7xQUFCAxMRGzZ8/GzJkzff/NmDHDiHxERFOK6FlOVV/ILSgoCHidn5+v\ndRYioikvNzfX18MXcQdexHfv3HDDDVrmICKaEqL9dt+QRX+iBToaGxvx5JNPahqGiEh20T5LqKq5\nd6xWK9rb2/HRRx9haGgIR48e5TQMREQSCtnT37hxIxITE/HFF1/g0UcfBQDk5eXhzTff1D0cERFp\nK2RPPzExEcD46Xvj4uL0SURERLpR/URuf3+/b2kvt9st7dqaRERmpvrunTVr1mDnzp2Ij4/H4OAg\n1qxZo2cuIiLSgeqib7fbUVpaCpfLhdTUVPT39+uZi4iIdKB6eKe9vR379+9HTU0NLBYL3nrrLT1z\nERGRDlQX/U8//RQbNmxAYmIiLBaL7wIvERHJQ3XRj4+PBwBYLBbdwhARkb5UF/2BgYGA1yMjI5qH\nISIifam+kJuXl4edO3eip6cHlZWV+NGPfqRnruv61dPPoMvVrdn2Qs1/oVZqsh1/fWmbJtsiItKD\n6qKflZUFh8OBixcvwuFwjHtYy0hdrm7sKHpI2P4n8vD+naIjEBFdV1izbCYkJGDOnDma7NjpdKK1\ntRUejwf5+flwOByabJeIiCYW8dTKk9XV1YWSkhIAQFVVFYs+EZEBIl4YfbKWL18uatdERKYlrOh7\n1dTUYPHixaJjEBGZgtCiX1dXh8zMTCGLAxMRmZGwMf2GhgakpaVh7ty5IT+bnJxsQCJtqMn60IOl\n6Lrs0myfmt1ympKMna9dfyWfBx8qxeUu7bID2uVPSU3GazuNXYlIpnMzGOYXS0R+IUW/s7MTDQ0N\nyM7ORmtrK3p7e1FWVjbh510ubYuMntRk7brswn/dJ+z37YT+UOEKmf9ylwv/9ohiUKLwfPCP0Pm1\nJtO5GQzziyUiv5DKk5aWhueee07EromITE34hVwiIjJO9I0xEOno3595Gle7o+96SpI9GS9ve0mT\nbRFdD4s+mcrVbhdsTxWKjjHO1f+pFh2BTIJFn0gi//7Mf+Bq92XNtqfdXyopeHnb3zTZFulL2qLP\nyc3IjK52X0b8E/8pOsY4V//3v0VHIJWkLfqcZZOIKHzSFn0iks8zz/wK3d1dmm1Pq+Epuz0V27b9\nVZNtRTsWfSIyTHd3F0qf2iE6xji7/udh0REMw6IvyB8quNwkERmPRV+Q6JyGQd0vog/+YdE5CRHp\nJfoqD0W96J17h7+MiEJh0SciUulXT/8KXa7ouxCdmpyKv76k7kI0iz4RkUpdri78Y8UromOM80jt\nJtWf5YRrREQmwqJPRGQiUg7vpCbbo/Lp19Rku6rPpdiT8IeKqzqnCV+KPUl0BEO4JZ/cbJBTHtAk\nSFn0//rSNs22VVpail27jF1i72/bXtZsWyLyyy4aZ9kM5xdRNM69w19E8uDwDhGRibDoExGZiJTD\nO0QkLzPNcxONWPSJyFCccE0sFn0KS7I9CR/8I/ruPAJGsxHR9bHoU1he0vDOI4B3HxEZjUWfTCXJ\nnhyVi5An2ZNFRyCTYNEnU3l520uabYt/pZCMeMsmEZGJsOgTEZkIiz4RkYlwTJ9IIkn2FFyNwnlu\nkuwpoiMYJpy566ORkKJ/4cIF1NbWwmazoaCgALNmzRIRg0g6L2/7m2bb4oXoyHARlQh8/PHHePDB\nB7Fx40YcPXpURAQiIlMSUvQTEhJ87bi4OBERiIhMSUjRVxTF12bRJyIyjpCi73a7ReyWiMj0LIp/\nt9sg+/btQ3FxMQDg7bffxrp16yb8bG1trVGxiIimlBUrVox7T0jRP3/+POrr66EoClauXIm0tDSj\nIxARmZKQok9ERGLwiVwiIhNh0SciMhFTFH2PxyPdHUMyZvYne36iqWrKz71TXV2N9vZ2FBUVIT09\n/bqfjZbpIcLJfOTIEVy8eBGxsbHIycnB9773veu+b4Rw8gPA6dOncezYMdhsNhQWFiI2NhZbtmxB\nVlYWFEXB5cuX8fjjjxuQfDyn04nW1lZ4PB7k5+fD4XBM+NloOX/CyRyN5084+YHoPn/COY6GnT+K\nCTQ3Nyvnzp0L+bn9+/f72nv37tUzUkhqM/s7ePBgWO/rSW3+K1euKM3NzePev3btmq9dXV2tabZw\n1NfX+9qhjmO0nD/hZPYXLedPOPmj/fzxFy3njymGd9SSdXqI06dP489//jMGBgZUvR9NmpubMTAw\ngMrKSpw4ccL3/rRp0wAAly5dwsyZM0XFw/Lly1V/NlrOn3AyA9F3/oSTP9rPH0D9cTTq/Jnywzvh\nUCSdHiIjIwO//vWv0dPTo+r9aHLmzBncfPPNKCkpwcGDB5GZmQmr9bu+SHNzM+666y6BCUfV1NRg\n8eLF1/1MtJ0/ajID0Xv+qMkvw/mj9jgadf6wp+9H5guPMTExOH78uOr3o0VMTAwKCgoAALNnz4bL\n5Qr4+sDAAOLj4wUk+05dXR0yMzNDPkQYTeeP2sxe0Xb+qM0vw/kDqDuORp0/pi36zc3NcDqdAe/5\n//kVjUMiwTJ3d3f72ufPnw/5vkjB8s+bNw9tbW0AAJfLhaSkJN/XPB5PQK9NhIaGBqSlpWHu3LkB\n70fz+RNO5mg8f8LJH+3nz0THUeT5M+WHdw4dOoS2tjZMnz4dGRkZWLZsGYDRq+oWiwW5ubm+zy5Z\nsgS7du3yTQ8hSjiZGxsbMTg4iJGREdxxxx0h34+2/Hl5eSgvL8fx48eRmpqKmJjvTslvv/0W8+bN\nMzS7v87OTjQ0NCA7Oxutra3o7e1FWVkZgOg9f8LNHG3nT7j5o/n8ASY+jiLPH07DQERkIqYd3iEi\nMiMWfSIiE2HRJyIyERZ9IiITYdEnIjIRFn0iIhOZ8vfpE02kvr4eHR0dcDgcuOeee0THITIEiz6Z\n1j333IOuri40NzeLjkJkGBZ9kt6nn36Kd955Bw8//DCOHDmChIQE5ObmIjU1FTU1NUhISMDIyAjS\n09ORn58f8L0TPZv44Ycf4tKlS4iNjcXg4CAKCwtxww034KuvvkJlZSVWrFiBu+++2/f63nvvxbJl\ny9DZ2Ym6ujrExcVheHgY2dnZuP322wEAn3zyCWpqajB37lwkJiZieHgYJSUlvn2ePHkSX375JWJi\nYuDxeHD+/Hk8/fTTAIATJ07g2LFjmDZtGgYHB7F48WLMmTNHpyNKU5pukzYTGejNN98cN2/63//+\nd2V4eNj3urGxUTl9+nTAZzo7O5W6urqA95xOp3L48GHf66GhIWXPnj2+1x9++KHS0dHh+1pFRcWE\nud54442A1y+//LKv/fnnnyvffPONoiij87+Xl5cHfLaxsVFRFEXp6+tTduzYEfC1PXv2BPzbiNRi\nT5+mBIvFgtWrVwe8d+HCBRw8eND32uPxIDY2Frfddtt1t3Xq1CkMDg5i//79AEb/Gujr6/N9fenS\npaioqMD999+PxsZG3H333b6vXbp0CQ0NDbDZbLBarbhy5UrAtlNSUnztadOmYWhoCMDoZFxjJxhb\nunQpAODixYvo6ekJyDMwMIDu7m7Vs2gSebHo05SgBBmmycrKwtq1awMm4RoZGQn5vbfffjv+9a9/\nBQwF+X+fzWZDYmIienp6cOnSpYDC++677+KBBx7wze64e/fukDkB4JZbbkFTUxN++MMf+t47e/Ys\nZs6cCYfDgVmzZqGoqMj3NY/HE3Q7RKFwwjWSXlVVFZqamnxj5z/4wQ+QkZGBvr4+vPfee7DZbFAU\nBf39/bjzzjsxZ84cXLhwAUePHkVfXx86Ozsxd+5cZGZmYv78+QCApqYmfPvtt7BarXC73bBYLCgu\nLvbts6+vD3/5y19QVFQUMFPi+++/j6tXr8JisWB4eBinTp3CqlWrsGjRItTV1eHw4cN46qmnkJaW\nhh07dsDj8eCXv/wlAKClpQVfffUV4uLiMDIyArvdjnvvvRfA6C+ATz75xPcLrLe3Fz/72c+QmJho\nyDGmqYNCSdVuAAAAOElEQVRFn4jIRPhwFhGRibDoExGZCIs+EZGJsOgTEZkIiz4RkYmw6BMRmQiL\nPhGRibDoExGZyP8BQPFbCw337KIAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x171a6df28>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sns.boxplot(x='relevance', y='edit_dist_st_pd_avg', data=majoritize(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.drop(['edit_dist_st_pt_raw', 'edit_dist_st_pd_raw'], axis=1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# df['edit_dist_st_bl_raw'] = df.apply(lambda x: calc_edit_distance(x, 'bullet'), axis=1)\n",
    "# df['edit_dist_st_br_raw'] = df.apply(lambda x: calc_edit_distance(x, 'brand'), axis=1)\n",
    "# df['edit_dist_st_bl_min'] = df['edit_dist_st_bl_raw'].map(lambda x: x[0])\n",
    "# df['edit_dist_st_bl_avg'] = df['edit_dist_st_bl_raw'].map(lambda x: x[1]) / df['len_search_term']\n",
    "# df['edit_dist_st_br_min'] = df['edit_dist_st_br_raw'].map(lambda x: x[0])\n",
    "# df['edit_dist_st_br_avg'] = df['edit_dist_st_br_raw'].map(lambda x: x[1]) / df['len_search_term']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Latent Semantic Space"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "By SVD-decomposing BOW / TF-IDF matrix, we obtain features that can be used to capture different query/product groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.decomposition import TruncatedSVD"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tsvd = TruncatedSVD(n_components=10, random_state=2016)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### tSVD for BOW"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "st_bow_tsvd = tsvd.fit_transform(cv_of_st)\n",
    "for i in range(st_bow_tsvd.shape[1]):\n",
    "    df['st_bow_tsvd'+str(i)] = st_bow_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pt_bow_tsvd = tsvd.fit_transform(cv_of_pt)\n",
    "for i in range(pt_bow_tsvd.shape[1]):\n",
    "    df['pt_bow_tsvd'+str(i)] = pt_bow_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd_bow_tsvd = tsvd.fit_transform(cv_of_pd)\n",
    "for i in range(pd_bow_tsvd.shape[1]):\n",
    "    df['pd_bow_tsvd'+str(i)] = pd_bow_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bl_bow_tsvd = tsvd.fit_transform(cv_of_bl)\n",
    "for i in range(bl_bow_tsvd.shape[1]):\n",
    "    df['bl_bow_tsvd'+str(i)] = bl_bow_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### tSVD for TF-IDF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "st_tfidf_tsvd = tsvd.fit_transform(tiv_of_st)\n",
    "for i in range(st_tfidf_tsvd.shape[1]):\n",
    "    df['st_tfidf_tsvd_'+str(i)] = st_tfidf_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "pt_tfidf_tsvd = tsvd.fit_transform(tiv_of_pt)\n",
    "for i in range(pt_tfidf_tsvd.shape[1]):\n",
    "    df['pt_tfidf_tsvd_'+str(i)] = pt_tfidf_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd_tfidf_tsvd = tsvd.fit_transform(tiv_of_pd)\n",
    "for i in range(pd_tfidf_tsvd.shape[1]):\n",
    "    df['pd_tfidf_tsvd_'+str(i)] = pd_tfidf_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "bl_tfidf_tsvd = tsvd.fit_transform(tiv_of_bl)\n",
    "for i in range(bl_tfidf_tsvd.shape[1]):\n",
    "    df['bl_tfidf_tsvd_'+str(i)] = bl_tfidf_tsvd[:,i]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Append"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "append = pd.read_csv('df_lev_dist_more_jaccard.csv', encoding='ISO-8859-1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cols_to_append = [\n",
    "    'query_in_title',\n",
    "    'query_in_description',\n",
    "    'query_last_word_in_title',\n",
    "    'query_last_word_in_description',\n",
    "    'word_in_title',\n",
    "    'word_in_description',\n",
    "    'word_in_brand',\n",
    "    'ratio_title',\n",
    "    'ratio_description',\n",
    "    'ratio_brand',\n",
    "    'lev_dist_to_product_title_min',\n",
    "    'lev_dist_to_product_title_max',\n",
    "    'lev_dist_to_product_title_sum',\n",
    "    'lev_dist_to_product_description_min',\n",
    "    'lev_dist_to_product_description_max',\n",
    "    'lev_dist_to_product_description_sum'\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "for x in cols_to_append:\n",
    "    df['old_'+x] = append[x]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of Features:  172\n"
     ]
    }
   ],
   "source": [
    "cols_to_drop = [\n",
    "    #'product_uid',\n",
    "    'search_term',\n",
    "    'product_title',\n",
    "    'product_description',\n",
    "    'brand',\n",
    "    'bullet',\n",
    "    'color',\n",
    "    'material',    \n",
    "    'tokens_search_term',\n",
    "    'tokens_product_title',\n",
    "    'tokens_product_description',\n",
    "    'tokens_brand',\n",
    "    'tokens_bullet',\n",
    "    'majority_relevance'\n",
    "]\n",
    "export_df = df.drop(cols_to_drop, axis=1)\n",
    "print('Number of Features: ', len(export_df.columns.tolist()) - 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>product_uid</th>\n",
       "      <th>relevance</th>\n",
       "      <th>bullet_count</th>\n",
       "      <th>flag_commercial</th>\n",
       "      <th>flag_residential</th>\n",
       "      <th>flag_indoor</th>\n",
       "      <th>flag_outdoor</th>\n",
       "      <th>flag_estar</th>\n",
       "      <th>match_commercial</th>\n",
       "      <th>match_residential</th>\n",
       "      <th>match_estar</th>\n",
       "      <th>match_indoor</th>\n",
       "      <th>match_outdoor</th>\n",
       "      <th>match_color</th>\n",
       "      <th>match_material</th>\n",
       "      <th>len_search_term</th>\n",
       "      <th>len_product_title</th>\n",
       "      <th>len_product_description</th>\n",
       "      <th>len_brand</th>\n",
       "      <th>len_bullet</th>\n",
       "      <th>0th_word_in_pd</th>\n",
       "      <th>1th_word_in_pd</th>\n",
       "      <th>2th_word_in_pd</th>\n",
       "      <th>3th_word_in_pd</th>\n",
       "      <th>4th_word_in_pd</th>\n",
       "      <th>5th_word_in_pd</th>\n",
       "      <th>6th_word_in_pd</th>\n",
       "      <th>7th_word_in_pd</th>\n",
       "      <th>8th_word_in_pd</th>\n",
       "      <th>9th_word_in_pd</th>\n",
       "      <th>0th_word_in_bl</th>\n",
       "      <th>1th_word_in_bl</th>\n",
       "      <th>2th_word_in_bl</th>\n",
       "      <th>3th_word_in_bl</th>\n",
       "      <th>4th_word_in_bl</th>\n",
       "      <th>5th_word_in_bl</th>\n",
       "      <th>6th_word_in_bl</th>\n",
       "      <th>7th_word_in_bl</th>\n",
       "      <th>8th_word_in_bl</th>\n",
       "      <th>9th_word_in_bl</th>\n",
       "      <th>0th_word_in_pt</th>\n",
       "      <th>1th_word_in_pt</th>\n",
       "      <th>2th_word_in_pt</th>\n",
       "      <th>3th_word_in_pt</th>\n",
       "      <th>4th_word_in_pt</th>\n",
       "      <th>5th_word_in_pt</th>\n",
       "      <th>6th_word_in_pt</th>\n",
       "      <th>7th_word_in_pt</th>\n",
       "      <th>8th_word_in_pt</th>\n",
       "      <th>9th_word_in_pt</th>\n",
       "      <th>flag_st_in_pt</th>\n",
       "      <th>flag_st_in_pd</th>\n",
       "      <th>flag_st_in_br</th>\n",
       "      <th>flag_st_in_bl</th>\n",
       "      <th>num_st_in_pt</th>\n",
       "      <th>num_st_in_pd</th>\n",
       "      <th>num_st_in_br</th>\n",
       "      <th>num_st_in_bl</th>\n",
       "      <th>ratio_st_in_pt</th>\n",
       "      <th>ratio_st_in_pd</th>\n",
       "      <th>ratio_st_in_br</th>\n",
       "      <th>ratio_st_in_bl</th>\n",
       "      <th>brand_encoded</th>\n",
       "      <th>flag_attr_has_material</th>\n",
       "      <th>flag_attr_has_color</th>\n",
       "      <th>flag_has_attr</th>\n",
       "      <th>cv_cos_sim_st_pt</th>\n",
       "      <th>cv_cos_sim_st_pd</th>\n",
       "      <th>cv_cos_sim_st_bl</th>\n",
       "      <th>jaccard_st_pt</th>\n",
       "      <th>jaccard_st_pd</th>\n",
       "      <th>jaccard_st_br</th>\n",
       "      <th>jaccard_st_bl</th>\n",
       "      <th>st_bow_tsvd0</th>\n",
       "      <th>st_bow_tsvd1</th>\n",
       "      <th>st_bow_tsvd2</th>\n",
       "      <th>st_bow_tsvd3</th>\n",
       "      <th>st_bow_tsvd4</th>\n",
       "      <th>st_bow_tsvd5</th>\n",
       "      <th>st_bow_tsvd6</th>\n",
       "      <th>st_bow_tsvd7</th>\n",
       "      <th>st_bow_tsvd8</th>\n",
       "      <th>st_bow_tsvd9</th>\n",
       "      <th>pt_bow_tsvd0</th>\n",
       "      <th>pt_bow_tsvd1</th>\n",
       "      <th>pt_bow_tsvd2</th>\n",
       "      <th>pt_bow_tsvd3</th>\n",
       "      <th>pt_bow_tsvd4</th>\n",
       "      <th>pt_bow_tsvd5</th>\n",
       "      <th>pt_bow_tsvd6</th>\n",
       "      <th>pt_bow_tsvd7</th>\n",
       "      <th>pt_bow_tsvd8</th>\n",
       "      <th>pt_bow_tsvd9</th>\n",
       "      <th>pd_bow_tsvd0</th>\n",
       "      <th>pd_bow_tsvd1</th>\n",
       "      <th>pd_bow_tsvd2</th>\n",
       "      <th>pd_bow_tsvd3</th>\n",
       "      <th>pd_bow_tsvd4</th>\n",
       "      <th>pd_bow_tsvd5</th>\n",
       "      <th>pd_bow_tsvd6</th>\n",
       "      <th>pd_bow_tsvd7</th>\n",
       "      <th>pd_bow_tsvd8</th>\n",
       "      <th>pd_bow_tsvd9</th>\n",
       "      <th>bl_bow_tsvd0</th>\n",
       "      <th>bl_bow_tsvd1</th>\n",
       "      <th>bl_bow_tsvd2</th>\n",
       "      <th>bl_bow_tsvd3</th>\n",
       "      <th>bl_bow_tsvd4</th>\n",
       "      <th>bl_bow_tsvd5</th>\n",
       "      <th>bl_bow_tsvd6</th>\n",
       "      <th>bl_bow_tsvd7</th>\n",
       "      <th>bl_bow_tsvd8</th>\n",
       "      <th>bl_bow_tsvd9</th>\n",
       "      <th>st_tfidf_tsvd_0</th>\n",
       "      <th>st_tfidf_tsvd_1</th>\n",
       "      <th>st_tfidf_tsvd_2</th>\n",
       "      <th>st_tfidf_tsvd_3</th>\n",
       "      <th>st_tfidf_tsvd_4</th>\n",
       "      <th>st_tfidf_tsvd_5</th>\n",
       "      <th>st_tfidf_tsvd_6</th>\n",
       "      <th>st_tfidf_tsvd_7</th>\n",
       "      <th>st_tfidf_tsvd_8</th>\n",
       "      <th>st_tfidf_tsvd_9</th>\n",
       "      <th>pt_tfidf_tsvd_0</th>\n",
       "      <th>pt_tfidf_tsvd_1</th>\n",
       "      <th>pt_tfidf_tsvd_2</th>\n",
       "      <th>pt_tfidf_tsvd_3</th>\n",
       "      <th>pt_tfidf_tsvd_4</th>\n",
       "      <th>pt_tfidf_tsvd_5</th>\n",
       "      <th>pt_tfidf_tsvd_6</th>\n",
       "      <th>pt_tfidf_tsvd_7</th>\n",
       "      <th>pt_tfidf_tsvd_8</th>\n",
       "      <th>pt_tfidf_tsvd_9</th>\n",
       "      <th>pd_tfidf_tsvd_0</th>\n",
       "      <th>pd_tfidf_tsvd_1</th>\n",
       "      <th>pd_tfidf_tsvd_2</th>\n",
       "      <th>pd_tfidf_tsvd_3</th>\n",
       "      <th>pd_tfidf_tsvd_4</th>\n",
       "      <th>pd_tfidf_tsvd_5</th>\n",
       "      <th>pd_tfidf_tsvd_6</th>\n",
       "      <th>pd_tfidf_tsvd_7</th>\n",
       "      <th>pd_tfidf_tsvd_8</th>\n",
       "      <th>pd_tfidf_tsvd_9</th>\n",
       "      <th>bl_tfidf_tsvd_0</th>\n",
       "      <th>bl_tfidf_tsvd_1</th>\n",
       "      <th>bl_tfidf_tsvd_2</th>\n",
       "      <th>bl_tfidf_tsvd_3</th>\n",
       "      <th>bl_tfidf_tsvd_4</th>\n",
       "      <th>bl_tfidf_tsvd_5</th>\n",
       "      <th>bl_tfidf_tsvd_6</th>\n",
       "      <th>bl_tfidf_tsvd_7</th>\n",
       "      <th>bl_tfidf_tsvd_8</th>\n",
       "      <th>bl_tfidf_tsvd_9</th>\n",
       "      <th>old_query_in_title</th>\n",
       "      <th>old_query_in_description</th>\n",
       "      <th>old_query_last_word_in_title</th>\n",
       "      <th>old_query_last_word_in_description</th>\n",
       "      <th>old_word_in_title</th>\n",
       "      <th>old_word_in_description</th>\n",
       "      <th>old_word_in_brand</th>\n",
       "      <th>old_ratio_title</th>\n",
       "      <th>old_ratio_description</th>\n",
       "      <th>old_ratio_brand</th>\n",
       "      <th>old_lev_dist_to_product_title_min</th>\n",
       "      <th>old_lev_dist_to_product_title_max</th>\n",
       "      <th>old_lev_dist_to_product_title_sum</th>\n",
       "      <th>old_lev_dist_to_product_description_min</th>\n",
       "      <th>old_lev_dist_to_product_description_max</th>\n",
       "      <th>old_lev_dist_to_product_description_sum</th>\n",
       "      <th>edit_dist_st_pt_min</th>\n",
       "      <th>edit_dist_st_pt_avg</th>\n",
       "      <th>edit_dist_st_pd_min</th>\n",
       "      <th>edit_dist_st_pd_avg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>100001</td>\n",
       "      <td>3.0</td>\n",
       "      <td>7</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>135</td>\n",
       "      <td>3</td>\n",
       "      <td>58</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.316228</td>\n",
       "      <td>0.197814</td>\n",
       "      <td>0.096225</td>\n",
       "      <td>0.142857</td>\n",
       "      <td>0.010101</td>\n",
       "      <td>0</td>\n",
       "      <td>0.020833</td>\n",
       "      <td>0.003626</td>\n",
       "      <td>0.004019</td>\n",
       "      <td>0.000797</td>\n",
       "      <td>0.006102</td>\n",
       "      <td>0.008658</td>\n",
       "      <td>-0.000548</td>\n",
       "      <td>0.006355</td>\n",
       "      <td>-0.002796</td>\n",
       "      <td>0.007951</td>\n",
       "      <td>0.005769</td>\n",
       "      <td>0.045605</td>\n",
       "      <td>0.024936</td>\n",
       "      <td>0.026892</td>\n",
       "      <td>0.028064</td>\n",
       "      <td>0.049714</td>\n",
       "      <td>0.092839</td>\n",
       "      <td>-0.078192</td>\n",
       "      <td>-0.067007</td>\n",
       "      <td>0.050241</td>\n",
       "      <td>0.049224</td>\n",
       "      <td>3.313323</td>\n",
       "      <td>-1.080023</td>\n",
       "      <td>0.246664</td>\n",
       "      <td>0.035078</td>\n",
       "      <td>1.213365</td>\n",
       "      <td>0.344366</td>\n",
       "      <td>-0.339457</td>\n",
       "      <td>-0.778180</td>\n",
       "      <td>1.311161</td>\n",
       "      <td>-0.855836</td>\n",
       "      <td>2.016449</td>\n",
       "      <td>-0.805119</td>\n",
       "      <td>1.716349</td>\n",
       "      <td>-0.538509</td>\n",
       "      <td>1.614636</td>\n",
       "      <td>0.482738</td>\n",
       "      <td>0.415191</td>\n",
       "      <td>-0.226891</td>\n",
       "      <td>-0.103242</td>\n",
       "      <td>-0.652989</td>\n",
       "      <td>0.003235</td>\n",
       "      <td>0.001598</td>\n",
       "      <td>0.004377</td>\n",
       "      <td>0.004809</td>\n",
       "      <td>0.003127</td>\n",
       "      <td>0.001640</td>\n",
       "      <td>-0.000262</td>\n",
       "      <td>0.010862</td>\n",
       "      <td>0.003171</td>\n",
       "      <td>-0.000317</td>\n",
       "      <td>0.040224</td>\n",
       "      <td>-0.002031</td>\n",
       "      <td>-0.002297</td>\n",
       "      <td>-0.031202</td>\n",
       "      <td>0.025376</td>\n",
       "      <td>0.031313</td>\n",
       "      <td>0.013203</td>\n",
       "      <td>0.000737</td>\n",
       "      <td>-0.007120</td>\n",
       "      <td>0.007481</td>\n",
       "      <td>0.224923</td>\n",
       "      <td>-0.004951</td>\n",
       "      <td>0.010509</td>\n",
       "      <td>-0.007065</td>\n",
       "      <td>-0.100198</td>\n",
       "      <td>-0.059775</td>\n",
       "      <td>-0.120487</td>\n",
       "      <td>-0.041719</td>\n",
       "      <td>0.115705</td>\n",
       "      <td>-0.026294</td>\n",
       "      <td>0.190741</td>\n",
       "      <td>-0.004078</td>\n",
       "      <td>0.002348</td>\n",
       "      <td>-0.025698</td>\n",
       "      <td>-0.087801</td>\n",
       "      <td>0.007791</td>\n",
       "      <td>-0.176249</td>\n",
       "      <td>-0.059840</td>\n",
       "      <td>0.028053</td>\n",
       "      <td>-0.022842</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>63</td>\n",
       "      <td>3</td>\n",
       "      <td>16</td>\n",
       "      <td>321</td>\n",
       "      <td>0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>100001</td>\n",
       "      <td>2.5</td>\n",
       "      <td>7</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>135</td>\n",
       "      <td>3</td>\n",
       "      <td>58</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1000</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.001644</td>\n",
       "      <td>0.001633</td>\n",
       "      <td>0.001574</td>\n",
       "      <td>0.003842</td>\n",
       "      <td>0.002912</td>\n",
       "      <td>0.001569</td>\n",
       "      <td>0.005649</td>\n",
       "      <td>-0.003359</td>\n",
       "      <td>0.003245</td>\n",
       "      <td>0.004215</td>\n",
       "      <td>0.045605</td>\n",
       "      <td>0.024936</td>\n",
       "      <td>0.026892</td>\n",
       "      <td>0.028064</td>\n",
       "      <td>0.049714</td>\n",
       "      <td>0.092839</td>\n",
       "      <td>-0.078192</td>\n",
       "      <td>-0.067007</td>\n",
       "      <td>0.050241</td>\n",
       "      <td>0.049224</td>\n",
       "      <td>3.313323</td>\n",
       "      <td>-1.080023</td>\n",
       "      <td>0.246664</td>\n",
       "      <td>0.035078</td>\n",
       "      <td>1.213365</td>\n",
       "      <td>0.344366</td>\n",
       "      <td>-0.339456</td>\n",
       "      <td>-0.778180</td>\n",
       "      <td>1.311161</td>\n",
       "      <td>-0.855836</td>\n",
       "      <td>2.016449</td>\n",
       "      <td>-0.805119</td>\n",
       "      <td>1.716349</td>\n",
       "      <td>-0.538509</td>\n",
       "      <td>1.614636</td>\n",
       "      <td>0.482739</td>\n",
       "      <td>0.415191</td>\n",
       "      <td>-0.226891</td>\n",
       "      <td>-0.103242</td>\n",
       "      <td>-0.652989</td>\n",
       "      <td>0.001812</td>\n",
       "      <td>0.002332</td>\n",
       "      <td>0.003688</td>\n",
       "      <td>0.001637</td>\n",
       "      <td>0.004279</td>\n",
       "      <td>0.001851</td>\n",
       "      <td>0.000037</td>\n",
       "      <td>0.011753</td>\n",
       "      <td>0.004452</td>\n",
       "      <td>-0.000231</td>\n",
       "      <td>0.040224</td>\n",
       "      <td>-0.002031</td>\n",
       "      <td>-0.002297</td>\n",
       "      <td>-0.031202</td>\n",
       "      <td>0.025376</td>\n",
       "      <td>0.031313</td>\n",
       "      <td>0.013203</td>\n",
       "      <td>0.000737</td>\n",
       "      <td>-0.007120</td>\n",
       "      <td>0.007481</td>\n",
       "      <td>0.224923</td>\n",
       "      <td>-0.004951</td>\n",
       "      <td>0.010509</td>\n",
       "      <td>-0.007065</td>\n",
       "      <td>-0.100198</td>\n",
       "      <td>-0.059775</td>\n",
       "      <td>-0.120487</td>\n",
       "      <td>-0.041719</td>\n",
       "      <td>0.115705</td>\n",
       "      <td>-0.026294</td>\n",
       "      <td>0.190741</td>\n",
       "      <td>-0.004078</td>\n",
       "      <td>0.002348</td>\n",
       "      <td>-0.025698</td>\n",
       "      <td>-0.087801</td>\n",
       "      <td>0.007791</td>\n",
       "      <td>-0.176249</td>\n",
       "      <td>-0.059840</td>\n",
       "      <td>0.028053</td>\n",
       "      <td>-0.022842</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.00</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>18</td>\n",
       "      <td>337</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>9</td>\n",
       "      <td>100002</td>\n",
       "      <td>3.0</td>\n",
       "      <td>10</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>-1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>12</td>\n",
       "      <td>169</td>\n",
       "      <td>4</td>\n",
       "      <td>109</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0.5</td>\n",
       "      <td>1010</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.243332</td>\n",
       "      <td>0.212000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.008621</td>\n",
       "      <td>0</td>\n",
       "      <td>0.012048</td>\n",
       "      <td>0.006960</td>\n",
       "      <td>0.000900</td>\n",
       "      <td>0.005602</td>\n",
       "      <td>0.005818</td>\n",
       "      <td>-0.000712</td>\n",
       "      <td>0.003775</td>\n",
       "      <td>-0.004111</td>\n",
       "      <td>0.010913</td>\n",
       "      <td>0.014781</td>\n",
       "      <td>-0.027090</td>\n",
       "      <td>0.052576</td>\n",
       "      <td>0.029210</td>\n",
       "      <td>0.058717</td>\n",
       "      <td>-0.054237</td>\n",
       "      <td>0.038212</td>\n",
       "      <td>0.104605</td>\n",
       "      <td>0.018579</td>\n",
       "      <td>-0.144403</td>\n",
       "      <td>-0.038084</td>\n",
       "      <td>0.025078</td>\n",
       "      <td>3.088265</td>\n",
       "      <td>-0.928897</td>\n",
       "      <td>-0.779630</td>\n",
       "      <td>-0.235617</td>\n",
       "      <td>1.022412</td>\n",
       "      <td>-6.070258</td>\n",
       "      <td>-0.177288</td>\n",
       "      <td>1.629826</td>\n",
       "      <td>1.669294</td>\n",
       "      <td>1.009680</td>\n",
       "      <td>2.418942</td>\n",
       "      <td>-0.412886</td>\n",
       "      <td>1.744545</td>\n",
       "      <td>-1.499353</td>\n",
       "      <td>-3.124326</td>\n",
       "      <td>-3.041471</td>\n",
       "      <td>2.646309</td>\n",
       "      <td>1.225488</td>\n",
       "      <td>0.994887</td>\n",
       "      <td>2.065224</td>\n",
       "      <td>0.001645</td>\n",
       "      <td>0.004407</td>\n",
       "      <td>0.006753</td>\n",
       "      <td>-0.001348</td>\n",
       "      <td>0.007957</td>\n",
       "      <td>0.000271</td>\n",
       "      <td>-0.005846</td>\n",
       "      <td>0.015219</td>\n",
       "      <td>0.042325</td>\n",
       "      <td>0.009750</td>\n",
       "      <td>0.046026</td>\n",
       "      <td>-0.003515</td>\n",
       "      <td>0.005800</td>\n",
       "      <td>-0.003947</td>\n",
       "      <td>-0.017649</td>\n",
       "      <td>0.021120</td>\n",
       "      <td>0.043252</td>\n",
       "      <td>0.012502</td>\n",
       "      <td>0.010272</td>\n",
       "      <td>-0.001656</td>\n",
       "      <td>0.280601</td>\n",
       "      <td>0.264614</td>\n",
       "      <td>-0.309222</td>\n",
       "      <td>0.395171</td>\n",
       "      <td>0.065995</td>\n",
       "      <td>0.230039</td>\n",
       "      <td>0.065034</td>\n",
       "      <td>-0.131033</td>\n",
       "      <td>0.016887</td>\n",
       "      <td>-0.094675</td>\n",
       "      <td>0.256366</td>\n",
       "      <td>-0.025741</td>\n",
       "      <td>0.073359</td>\n",
       "      <td>0.695745</td>\n",
       "      <td>0.252360</td>\n",
       "      <td>-0.039998</td>\n",
       "      <td>-0.089243</td>\n",
       "      <td>-0.013464</td>\n",
       "      <td>0.089968</td>\n",
       "      <td>0.028354</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.25</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>120</td>\n",
       "      <td>2</td>\n",
       "      <td>18</td>\n",
       "      <td>337</td>\n",
       "      <td>2</td>\n",
       "      <td>2.5</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id  product_uid  relevance  bullet_count  flag_commercial  \\\n",
       "0   2       100001        3.0             7               -1   \n",
       "1   3       100001        2.5             7               -1   \n",
       "2   9       100002        3.0            10               -1   \n",
       "\n",
       "   flag_residential  flag_indoor  flag_outdoor  flag_estar  match_commercial  \\\n",
       "0                -1           -1            -1          -1                 0   \n",
       "1                -1           -1            -1          -1                 0   \n",
       "2                -1           -1            -1          -1                 0   \n",
       "\n",
       "   match_residential  match_estar  match_indoor  match_outdoor  match_color  \\\n",
       "0                  0            0             0              0            0   \n",
       "1                  0            0             0              0            0   \n",
       "2                  0            0             0              0            0   \n",
       "\n",
       "   match_material  len_search_term  len_product_title  \\\n",
       "0               0                2                  6   \n",
       "1               1                2                  6   \n",
       "2               0                2                 12   \n",
       "\n",
       "   len_product_description  len_brand  len_bullet  0th_word_in_pd  \\\n",
       "0                      135          3          58               1   \n",
       "1                      135          3          58               1   \n",
       "2                      169          4         109               1   \n",
       "\n",
       "   1th_word_in_pd  2th_word_in_pd  3th_word_in_pd  4th_word_in_pd  \\\n",
       "0               0               0               0               0   \n",
       "1               0               0               0               0   \n",
       "2               0               0               0               0   \n",
       "\n",
       "   5th_word_in_pd  6th_word_in_pd  7th_word_in_pd  8th_word_in_pd  \\\n",
       "0               0               0               0               0   \n",
       "1               0               0               0               0   \n",
       "2               0               0               0               0   \n",
       "\n",
       "   9th_word_in_pd  0th_word_in_bl  1th_word_in_bl  2th_word_in_bl  \\\n",
       "0               0               1               0               0   \n",
       "1               0               1               0               0   \n",
       "2               0               1               1               0   \n",
       "\n",
       "   3th_word_in_bl  4th_word_in_bl  5th_word_in_bl  6th_word_in_bl  \\\n",
       "0               0               0               0               0   \n",
       "1               0               0               0               0   \n",
       "2               0               0               0               0   \n",
       "\n",
       "   7th_word_in_bl  8th_word_in_bl  9th_word_in_bl  0th_word_in_pt  \\\n",
       "0               0               0               0               1   \n",
       "1               0               0               0               1   \n",
       "2               0               0               0               1   \n",
       "\n",
       "   1th_word_in_pt  2th_word_in_pt  3th_word_in_pt  4th_word_in_pt  \\\n",
       "0               0               0               0               0   \n",
       "1               0               0               0               0   \n",
       "2               0               0               0               0   \n",
       "\n",
       "   5th_word_in_pt  6th_word_in_pt  7th_word_in_pt  8th_word_in_pt  \\\n",
       "0               0               0               0               0   \n",
       "1               0               0               0               0   \n",
       "2               0               0               0               0   \n",
       "\n",
       "   9th_word_in_pt  flag_st_in_pt  flag_st_in_pd  flag_st_in_br  flag_st_in_bl  \\\n",
       "0               0              0              0              0              0   \n",
       "1               0              0              0              0              0   \n",
       "2               0              0              0              0              0   \n",
       "\n",
       "   num_st_in_pt  num_st_in_pd  num_st_in_br  num_st_in_bl  ratio_st_in_pt  \\\n",
       "0             1             1             0             1             0.5   \n",
       "1             0             0             0             0             0.0   \n",
       "2             0             1             0             1             0.0   \n",
       "\n",
       "   ratio_st_in_pd  ratio_st_in_br  ratio_st_in_bl  brand_encoded  \\\n",
       "0             0.5               0             0.5           1000   \n",
       "1             0.0               0             0.0           1000   \n",
       "2             0.5               0             0.5           1010   \n",
       "\n",
       "   flag_attr_has_material  flag_attr_has_color  flag_has_attr  \\\n",
       "0                       1                    0              1   \n",
       "1                       1                    0              1   \n",
       "2                       0                    1              1   \n",
       "\n",
       "   cv_cos_sim_st_pt  cv_cos_sim_st_pd  cv_cos_sim_st_bl  jaccard_st_pt  \\\n",
       "0          0.316228          0.197814          0.096225       0.142857   \n",
       "1          0.000000          0.000000          0.000000       0.000000   \n",
       "2          0.000000          0.243332          0.212000       0.000000   \n",
       "\n",
       "   jaccard_st_pd  jaccard_st_br  jaccard_st_bl  st_bow_tsvd0  st_bow_tsvd1  \\\n",
       "0       0.010101              0       0.020833      0.003626      0.004019   \n",
       "1       0.000000              0       0.000000      0.001644      0.001633   \n",
       "2       0.008621              0       0.012048      0.006960      0.000900   \n",
       "\n",
       "   st_bow_tsvd2  st_bow_tsvd3  st_bow_tsvd4  st_bow_tsvd5  st_bow_tsvd6  \\\n",
       "0      0.000797      0.006102      0.008658     -0.000548      0.006355   \n",
       "1      0.001574      0.003842      0.002912      0.001569      0.005649   \n",
       "2      0.005602      0.005818     -0.000712      0.003775     -0.004111   \n",
       "\n",
       "   st_bow_tsvd7  st_bow_tsvd8  st_bow_tsvd9  pt_bow_tsvd0  pt_bow_tsvd1  \\\n",
       "0     -0.002796      0.007951      0.005769      0.045605      0.024936   \n",
       "1     -0.003359      0.003245      0.004215      0.045605      0.024936   \n",
       "2      0.010913      0.014781     -0.027090      0.052576      0.029210   \n",
       "\n",
       "   pt_bow_tsvd2  pt_bow_tsvd3  pt_bow_tsvd4  pt_bow_tsvd5  pt_bow_tsvd6  \\\n",
       "0      0.026892      0.028064      0.049714      0.092839     -0.078192   \n",
       "1      0.026892      0.028064      0.049714      0.092839     -0.078192   \n",
       "2      0.058717     -0.054237      0.038212      0.104605      0.018579   \n",
       "\n",
       "   pt_bow_tsvd7  pt_bow_tsvd8  pt_bow_tsvd9  pd_bow_tsvd0  pd_bow_tsvd1  \\\n",
       "0     -0.067007      0.050241      0.049224      3.313323     -1.080023   \n",
       "1     -0.067007      0.050241      0.049224      3.313323     -1.080023   \n",
       "2     -0.144403     -0.038084      0.025078      3.088265     -0.928897   \n",
       "\n",
       "   pd_bow_tsvd2  pd_bow_tsvd3  pd_bow_tsvd4  pd_bow_tsvd5  pd_bow_tsvd6  \\\n",
       "0      0.246664      0.035078      1.213365      0.344366     -0.339457   \n",
       "1      0.246664      0.035078      1.213365      0.344366     -0.339456   \n",
       "2     -0.779630     -0.235617      1.022412     -6.070258     -0.177288   \n",
       "\n",
       "   pd_bow_tsvd7  pd_bow_tsvd8  pd_bow_tsvd9  bl_bow_tsvd0  bl_bow_tsvd1  \\\n",
       "0     -0.778180      1.311161     -0.855836      2.016449     -0.805119   \n",
       "1     -0.778180      1.311161     -0.855836      2.016449     -0.805119   \n",
       "2      1.629826      1.669294      1.009680      2.418942     -0.412886   \n",
       "\n",
       "   bl_bow_tsvd2  bl_bow_tsvd3  bl_bow_tsvd4  bl_bow_tsvd5  bl_bow_tsvd6  \\\n",
       "0      1.716349     -0.538509      1.614636      0.482738      0.415191   \n",
       "1      1.716349     -0.538509      1.614636      0.482739      0.415191   \n",
       "2      1.744545     -1.499353     -3.124326     -3.041471      2.646309   \n",
       "\n",
       "   bl_bow_tsvd7  bl_bow_tsvd8  bl_bow_tsvd9  st_tfidf_tsvd_0  st_tfidf_tsvd_1  \\\n",
       "0     -0.226891     -0.103242     -0.652989         0.003235         0.001598   \n",
       "1     -0.226891     -0.103242     -0.652989         0.001812         0.002332   \n",
       "2      1.225488      0.994887      2.065224         0.001645         0.004407   \n",
       "\n",
       "   st_tfidf_tsvd_2  st_tfidf_tsvd_3  st_tfidf_tsvd_4  st_tfidf_tsvd_5  \\\n",
       "0         0.004377         0.004809         0.003127         0.001640   \n",
       "1         0.003688         0.001637         0.004279         0.001851   \n",
       "2         0.006753        -0.001348         0.007957         0.000271   \n",
       "\n",
       "   st_tfidf_tsvd_6  st_tfidf_tsvd_7  st_tfidf_tsvd_8  st_tfidf_tsvd_9  \\\n",
       "0        -0.000262         0.010862         0.003171        -0.000317   \n",
       "1         0.000037         0.011753         0.004452        -0.000231   \n",
       "2        -0.005846         0.015219         0.042325         0.009750   \n",
       "\n",
       "   pt_tfidf_tsvd_0  pt_tfidf_tsvd_1  pt_tfidf_tsvd_2  pt_tfidf_tsvd_3  \\\n",
       "0         0.040224        -0.002031        -0.002297        -0.031202   \n",
       "1         0.040224        -0.002031        -0.002297        -0.031202   \n",
       "2         0.046026        -0.003515         0.005800        -0.003947   \n",
       "\n",
       "   pt_tfidf_tsvd_4  pt_tfidf_tsvd_5  pt_tfidf_tsvd_6  pt_tfidf_tsvd_7  \\\n",
       "0         0.025376         0.031313         0.013203         0.000737   \n",
       "1         0.025376         0.031313         0.013203         0.000737   \n",
       "2        -0.017649         0.021120         0.043252         0.012502   \n",
       "\n",
       "   pt_tfidf_tsvd_8  pt_tfidf_tsvd_9  pd_tfidf_tsvd_0  pd_tfidf_tsvd_1  \\\n",
       "0        -0.007120         0.007481         0.224923        -0.004951   \n",
       "1        -0.007120         0.007481         0.224923        -0.004951   \n",
       "2         0.010272        -0.001656         0.280601         0.264614   \n",
       "\n",
       "   pd_tfidf_tsvd_2  pd_tfidf_tsvd_3  pd_tfidf_tsvd_4  pd_tfidf_tsvd_5  \\\n",
       "0         0.010509        -0.007065        -0.100198        -0.059775   \n",
       "1         0.010509        -0.007065        -0.100198        -0.059775   \n",
       "2        -0.309222         0.395171         0.065995         0.230039   \n",
       "\n",
       "   pd_tfidf_tsvd_6  pd_tfidf_tsvd_7  pd_tfidf_tsvd_8  pd_tfidf_tsvd_9  \\\n",
       "0        -0.120487        -0.041719         0.115705        -0.026294   \n",
       "1        -0.120487        -0.041719         0.115705        -0.026294   \n",
       "2         0.065034        -0.131033         0.016887        -0.094675   \n",
       "\n",
       "   bl_tfidf_tsvd_0  bl_tfidf_tsvd_1  bl_tfidf_tsvd_2  bl_tfidf_tsvd_3  \\\n",
       "0         0.190741        -0.004078         0.002348        -0.025698   \n",
       "1         0.190741        -0.004078         0.002348        -0.025698   \n",
       "2         0.256366        -0.025741         0.073359         0.695745   \n",
       "\n",
       "   bl_tfidf_tsvd_4  bl_tfidf_tsvd_5  bl_tfidf_tsvd_6  bl_tfidf_tsvd_7  \\\n",
       "0        -0.087801         0.007791        -0.176249        -0.059840   \n",
       "1        -0.087801         0.007791        -0.176249        -0.059840   \n",
       "2         0.252360        -0.039998        -0.089243        -0.013464   \n",
       "\n",
       "   bl_tfidf_tsvd_8  bl_tfidf_tsvd_9  old_query_in_title  \\\n",
       "0         0.028053        -0.022842                   0   \n",
       "1         0.028053        -0.022842                   0   \n",
       "2         0.089968         0.028354                   0   \n",
       "\n",
       "   old_query_in_description  old_query_last_word_in_title  \\\n",
       "0                         0                             0   \n",
       "1                         0                             0   \n",
       "2                         0                             0   \n",
       "\n",
       "   old_query_last_word_in_description  old_word_in_title  \\\n",
       "0                                   0                  1   \n",
       "1                                   0                  1   \n",
       "2                                   0                  1   \n",
       "\n",
       "   old_word_in_description  old_word_in_brand  old_ratio_title  \\\n",
       "0                        1                  0              0.5   \n",
       "1                        1                  0              0.5   \n",
       "2                        1                  1              0.5   \n",
       "\n",
       "   old_ratio_description  old_ratio_brand  old_lev_dist_to_product_title_min  \\\n",
       "0                    0.5             0.00                                  3   \n",
       "1                    0.5             0.00                                  3   \n",
       "2                    0.5             0.25                                  2   \n",
       "\n",
       "   old_lev_dist_to_product_title_max  old_lev_dist_to_product_title_sum  \\\n",
       "0                                  7                                 63   \n",
       "1                                  7                                 67   \n",
       "2                                  7                                120   \n",
       "\n",
       "   old_lev_dist_to_product_description_min  \\\n",
       "0                                        3   \n",
       "1                                        3   \n",
       "2                                        2   \n",
       "\n",
       "   old_lev_dist_to_product_description_max  \\\n",
       "0                                       16   \n",
       "1                                       18   \n",
       "2                                       18   \n",
       "\n",
       "   old_lev_dist_to_product_description_sum  edit_dist_st_pt_min  \\\n",
       "0                                      321                    0   \n",
       "1                                      337                    2   \n",
       "2                                      337                    2   \n",
       "\n",
       "   edit_dist_st_pt_avg  edit_dist_st_pd_min  edit_dist_st_pd_avg  \n",
       "0                  3.0                    0                  2.0  \n",
       "1                  4.0                    1                  2.5  \n",
       "2                  2.5                    0                  1.0  "
      ]
     },
     "execution_count": 218,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "export_df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "export_df.to_csv('./df_new_423.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.to_csv('./df_full_423.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
